diff --git "a/experiment_config.json" "b/experiment_config.json" new file mode 100644--- /dev/null +++ "b/experiment_config.json" @@ -0,0 +1,223362 @@ +{ + "training_args": { + "output_dir": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/qa_commonsense_qa_ff_v2", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": true, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 4, + "per_device_eval_batch_size": 8, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 4, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 2e-05, + "weight_decay": 0.0, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3, + "max_steps": -1, + "lr_scheduler_type": "linear", + "lr_scheduler_kwargs": {}, + "warmup_ratio": 0.0, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/qa_commonsense_qa_ff_v2/runs/Oct01_03-20-41_gx13", + "logging_strategy": "steps", + "logging_first_step": false, + "logging_steps": 20, + "logging_nan_inf_filter": true, + "save_strategy": "epoch", + "save_steps": 500, + "save_total_limit": null, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "seed": 42, + "data_seed": null, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": false, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": [], + "dataloader_drop_last": false, + "eval_steps": 152, + "dataloader_num_workers": 0, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/qa_commonsense_qa_ff_v2", + "disable_tqdm": false, + "remove_unused_columns": true, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": null, + "greater_is_better": null, + "ignore_data_skip": false, + "fsdp": [], + "fsdp_min_num_params": 0, + "fsdp_config": { + "min_num_params": 0, + "xla": false, + "xla_fsdp_v2": false, + "xla_fsdp_grad_ckpt": false + }, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "split_batches": false, + "dispatch_batches": null, + "even_batches": true, + "use_seedable_sampler": true, + "non_blocking": false, + "gradient_accumulation_kwargs": null + }, + "deepspeed": null, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_token": "", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": false, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": "", + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false + }, + "lora_config": null, + "flops": { + "eval": 9808423376665600, + "train": 20896705064436048, + "total": 30705128441101648 + }, + "total": { + "total": 141127.95772, + "train": 109959.72768000001, + "eval": 31168.230040000002 + }, + "logs": [ + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:20:53.013969", + "step": 0, + "epoch": 0 + }, + { + "type": "pplx", + "content": 68890406.29865518, + "timestamp": "2025-10-01 03:20:53.029503", + "step": 0, + "epoch": 0 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:53.109610", + "step": 0, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.8018559813499451, + "timestamp": "2025-10-01 03:20:53.112660", + "step": 1, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:53.151495", + "step": 1, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.9373461008071899, + "timestamp": "2025-10-01 03:20:53.160318", + "step": 2, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:53.197813", + "step": 2, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.959297239780426, + "timestamp": "2025-10-01 03:20:53.205063", + "step": 3, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:53.243779", + "step": 3, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.8432273268699646, + "timestamp": "2025-10-01 03:20:53.310189", + "step": 4, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:53.348090", + "step": 4, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07753092795610428, + "timestamp": "2025-10-01 03:20:53.351318", + "step": 5, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:53.392978", + "step": 5, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08649538457393646, + "timestamp": "2025-10-01 03:20:53.403425", + "step": 6, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:53.450004", + "step": 6, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08204222470521927, + "timestamp": "2025-10-01 03:20:53.457769", + "step": 7, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:53.505219", + "step": 7, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.09675628691911697, + "timestamp": "2025-10-01 03:20:53.533467", + "step": 8, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:20:53.598888", + "step": 8, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05419256165623665, + "timestamp": "2025-10-01 03:20:53.605230", + "step": 9, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:53.675139", + "step": 9, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.051280390471220016, + "timestamp": "2025-10-01 03:20:53.682259", + "step": 10, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:53.729148", + "step": 10, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05542677268385887, + "timestamp": "2025-10-01 03:20:53.738562", + "step": 11, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:53.777784", + "step": 11, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05907134339213371, + "timestamp": "2025-10-01 03:20:53.806640", + "step": 12, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:20:53.854639", + "step": 12, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0645960122346878, + "timestamp": "2025-10-01 03:20:53.861968", + "step": 13, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:53.903839", + "step": 13, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05337533354759216, + "timestamp": "2025-10-01 03:20:53.914050", + "step": 14, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:53.953326", + "step": 14, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03405570611357689, + "timestamp": "2025-10-01 03:20:53.962277", + "step": 15, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:54.010356", + "step": 15, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04106346517801285, + "timestamp": "2025-10-01 03:20:54.041481", + "step": 16, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:54.088783", + "step": 16, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05952487513422966, + "timestamp": "2025-10-01 03:20:54.097636", + "step": 17, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:20:54.135538", + "step": 17, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03962479159235954, + "timestamp": "2025-10-01 03:20:54.144856", + "step": 18, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:54.191640", + "step": 18, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08343984186649323, + "timestamp": "2025-10-01 03:20:54.202732", + "step": 19, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:54.244604", + "step": 19, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05129793658852577, + "timestamp": "2025-10-01 03:20:54.275264", + "step": 20, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:54.311833", + "step": 20, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.052805740386247635, + "timestamp": "2025-10-01 03:20:54.318226", + "step": 21, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:54.365941", + "step": 21, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05853865295648575, + "timestamp": "2025-10-01 03:20:54.381351", + "step": 22, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:54.424938", + "step": 22, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04577627405524254, + "timestamp": "2025-10-01 03:20:54.442880", + "step": 23, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:54.491172", + "step": 23, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04199828580021858, + "timestamp": "2025-10-01 03:20:54.522416", + "step": 24, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:54.563183", + "step": 24, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04401063919067383, + "timestamp": "2025-10-01 03:20:54.578065", + "step": 25, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:54.629190", + "step": 25, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043335702270269394, + "timestamp": "2025-10-01 03:20:54.639699", + "step": 26, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:20:54.685503", + "step": 26, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04798820987343788, + "timestamp": "2025-10-01 03:20:54.701866", + "step": 27, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:54.754826", + "step": 27, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04261348769068718, + "timestamp": "2025-10-01 03:20:54.790097", + "step": 28, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:54.838934", + "step": 28, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02256474457681179, + "timestamp": "2025-10-01 03:20:54.849320", + "step": 29, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:54.887413", + "step": 29, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05060632899403572, + "timestamp": "2025-10-01 03:20:54.893194", + "step": 30, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:54.942781", + "step": 30, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03186265751719475, + "timestamp": "2025-10-01 03:20:54.959291", + "step": 31, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:55.003232", + "step": 31, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046790122985839844, + "timestamp": "2025-10-01 03:20:55.039599", + "step": 32, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:55.089405", + "step": 32, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.056181006133556366, + "timestamp": "2025-10-01 03:20:55.102967", + "step": 33, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:55.144773", + "step": 33, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0641719326376915, + "timestamp": "2025-10-01 03:20:55.160286", + "step": 34, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:55.206931", + "step": 34, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04143786057829857, + "timestamp": "2025-10-01 03:20:55.218992", + "step": 35, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:55.270451", + "step": 35, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0385771170258522, + "timestamp": "2025-10-01 03:20:55.301917", + "step": 36, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:55.351301", + "step": 36, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03372429683804512, + "timestamp": "2025-10-01 03:20:55.366453", + "step": 37, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:55.412218", + "step": 37, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03956858441233635, + "timestamp": "2025-10-01 03:20:55.428172", + "step": 38, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:55.474732", + "step": 38, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020423339679837227, + "timestamp": "2025-10-01 03:20:55.491571", + "step": 39, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:55.543322", + "step": 39, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039162881672382355, + "timestamp": "2025-10-01 03:20:55.582221", + "step": 40, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:55.632450", + "step": 40, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04644813761115074, + "timestamp": "2025-10-01 03:20:55.640130", + "step": 41, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:20:55.680519", + "step": 41, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03708171844482422, + "timestamp": "2025-10-01 03:20:55.691308", + "step": 42, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:55.735559", + "step": 42, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04152229055762291, + "timestamp": "2025-10-01 03:20:55.750643", + "step": 43, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:55.797158", + "step": 43, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08285709470510483, + "timestamp": "2025-10-01 03:20:55.834777", + "step": 44, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:55.879520", + "step": 44, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028124229982495308, + "timestamp": "2025-10-01 03:20:55.897594", + "step": 45, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:55.940378", + "step": 45, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05854777619242668, + "timestamp": "2025-10-01 03:20:55.957623", + "step": 46, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:56.003645", + "step": 46, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03759978339076042, + "timestamp": "2025-10-01 03:20:56.019830", + "step": 47, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:56.062857", + "step": 47, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03626880422234535, + "timestamp": "2025-10-01 03:20:56.099133", + "step": 48, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:56.137227", + "step": 48, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05131187289953232, + "timestamp": "2025-10-01 03:20:56.140558", + "step": 49, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:56.181367", + "step": 49, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04723525792360306, + "timestamp": "2025-10-01 03:20:56.196670", + "step": 50, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:20:56.232701", + "step": 50, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.095095194876194, + "timestamp": "2025-10-01 03:20:56.241410", + "step": 51, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:20:56.286341", + "step": 51, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01704883761703968, + "timestamp": "2025-10-01 03:20:56.316152", + "step": 52, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:56.358411", + "step": 52, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0912850871682167, + "timestamp": "2025-10-01 03:20:56.367128", + "step": 53, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:56.405516", + "step": 53, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017210720106959343, + "timestamp": "2025-10-01 03:20:56.414609", + "step": 54, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:56.450895", + "step": 54, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03516107425093651, + "timestamp": "2025-10-01 03:20:56.459392", + "step": 55, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:20:56.498524", + "step": 55, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.055034179240465164, + "timestamp": "2025-10-01 03:20:56.530136", + "step": 56, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:56.572670", + "step": 56, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008457333780825138, + "timestamp": "2025-10-01 03:20:56.580720", + "step": 57, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:56.617978", + "step": 57, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04227998107671738, + "timestamp": "2025-10-01 03:20:56.623968", + "step": 58, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:56.668906", + "step": 58, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06775923818349838, + "timestamp": "2025-10-01 03:20:56.677148", + "step": 59, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:56.714352", + "step": 59, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07734692096710205, + "timestamp": "2025-10-01 03:20:56.743651", + "step": 60, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:56.782504", + "step": 60, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0151920635253191, + "timestamp": "2025-10-01 03:20:56.786975", + "step": 61, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:56.826507", + "step": 61, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04924556985497475, + "timestamp": "2025-10-01 03:20:56.836898", + "step": 62, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:56.874663", + "step": 62, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0562850721180439, + "timestamp": "2025-10-01 03:20:56.882551", + "step": 63, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:56.922067", + "step": 63, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04169496148824692, + "timestamp": "2025-10-01 03:20:56.953307", + "step": 64, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:56.995035", + "step": 64, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06484655290842056, + "timestamp": "2025-10-01 03:20:56.999348", + "step": 65, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:57.034955", + "step": 65, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02631377801299095, + "timestamp": "2025-10-01 03:20:57.043095", + "step": 66, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:57.080800", + "step": 66, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026904184371232986, + "timestamp": "2025-10-01 03:20:57.088097", + "step": 67, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:57.121766", + "step": 67, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07582578808069229, + "timestamp": "2025-10-01 03:20:57.151043", + "step": 68, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:20:57.190352", + "step": 68, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.11687595397233963, + "timestamp": "2025-10-01 03:20:57.197459", + "step": 69, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:57.238952", + "step": 69, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03880123421549797, + "timestamp": "2025-10-01 03:20:57.249315", + "step": 70, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:57.296596", + "step": 70, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.055843256413936615, + "timestamp": "2025-10-01 03:20:57.304862", + "step": 71, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:57.349014", + "step": 71, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01453090738505125, + "timestamp": "2025-10-01 03:20:57.381084", + "step": 72, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:57.434381", + "step": 72, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0484919548034668, + "timestamp": "2025-10-01 03:20:57.444393", + "step": 73, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:57.500999", + "step": 73, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06835059076547623, + "timestamp": "2025-10-01 03:20:57.505743", + "step": 74, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:57.544947", + "step": 74, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04029897600412369, + "timestamp": "2025-10-01 03:20:57.553796", + "step": 75, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:57.597482", + "step": 75, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04682263731956482, + "timestamp": "2025-10-01 03:20:57.625260", + "step": 76, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:57.671026", + "step": 76, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07231863588094711, + "timestamp": "2025-10-01 03:20:57.676169", + "step": 77, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:57.733499", + "step": 77, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07678253203630447, + "timestamp": "2025-10-01 03:20:57.742443", + "step": 78, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:57.801012", + "step": 78, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08190608024597168, + "timestamp": "2025-10-01 03:20:57.811973", + "step": 79, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:57.941005", + "step": 79, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0384153388440609, + "timestamp": "2025-10-01 03:20:57.973095", + "step": 80, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:20:58.024544", + "step": 80, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03881394863128662, + "timestamp": "2025-10-01 03:20:58.033554", + "step": 81, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.092823", + "step": 81, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06570108979940414, + "timestamp": "2025-10-01 03:20:58.104441", + "step": 82, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.177610", + "step": 82, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.055005334317684174, + "timestamp": "2025-10-01 03:20:58.187777", + "step": 83, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.229949", + "step": 83, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04427757486701012, + "timestamp": "2025-10-01 03:20:58.260150", + "step": 84, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.295883", + "step": 84, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03531666472554207, + "timestamp": "2025-10-01 03:20:58.305781", + "step": 85, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.350756", + "step": 85, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03008469007909298, + "timestamp": "2025-10-01 03:20:58.354219", + "step": 86, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.397650", + "step": 86, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06379244476556778, + "timestamp": "2025-10-01 03:20:58.401789", + "step": 87, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.440523", + "step": 87, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04317265376448631, + "timestamp": "2025-10-01 03:20:58.471004", + "step": 88, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.506763", + "step": 88, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025248361751437187, + "timestamp": "2025-10-01 03:20:58.510105", + "step": 89, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:20:58.552208", + "step": 89, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.052514445036649704, + "timestamp": "2025-10-01 03:20:58.563849", + "step": 90, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:20:58.609781", + "step": 90, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03233317658305168, + "timestamp": "2025-10-01 03:20:58.616906", + "step": 91, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.658064", + "step": 91, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027389157563447952, + "timestamp": "2025-10-01 03:20:58.688192", + "step": 92, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.732814", + "step": 92, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03284407779574394, + "timestamp": "2025-10-01 03:20:58.743639", + "step": 93, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.786922", + "step": 93, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04856187477707863, + "timestamp": "2025-10-01 03:20:58.794883", + "step": 94, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.840639", + "step": 94, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04502064362168312, + "timestamp": "2025-10-01 03:20:58.849308", + "step": 95, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.882547", + "step": 95, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04484691843390465, + "timestamp": "2025-10-01 03:20:58.916418", + "step": 96, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:20:58.956123", + "step": 96, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04559338837862015, + "timestamp": "2025-10-01 03:20:58.962314", + "step": 97, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:58.995757", + "step": 97, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03556380420923233, + "timestamp": "2025-10-01 03:20:59.004163", + "step": 98, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:59.037206", + "step": 98, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041196729987859726, + "timestamp": "2025-10-01 03:20:59.048255", + "step": 99, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:20:59.090627", + "step": 99, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03655094653367996, + "timestamp": "2025-10-01 03:20:59.121686", + "step": 100, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:59.163364", + "step": 100, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02651095949113369, + "timestamp": "2025-10-01 03:20:59.173945", + "step": 101, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:59.210444", + "step": 101, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025533542037010193, + "timestamp": "2025-10-01 03:20:59.218963", + "step": 102, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:20:59.256221", + "step": 102, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04308776929974556, + "timestamp": "2025-10-01 03:20:59.264262", + "step": 103, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:59.303763", + "step": 103, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04111707955598831, + "timestamp": "2025-10-01 03:20:59.335051", + "step": 104, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:59.373781", + "step": 104, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01739845797419548, + "timestamp": "2025-10-01 03:20:59.377889", + "step": 105, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:59.411407", + "step": 105, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035268209874629974, + "timestamp": "2025-10-01 03:20:59.421642", + "step": 106, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:59.465497", + "step": 106, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034303709864616394, + "timestamp": "2025-10-01 03:20:59.476477", + "step": 107, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:59.513667", + "step": 107, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05243156477808952, + "timestamp": "2025-10-01 03:20:59.544346", + "step": 108, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:59.584656", + "step": 108, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029031913727521896, + "timestamp": "2025-10-01 03:20:59.593677", + "step": 109, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:59.633607", + "step": 109, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05792639032006264, + "timestamp": "2025-10-01 03:20:59.640855", + "step": 110, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:59.681285", + "step": 110, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05424019694328308, + "timestamp": "2025-10-01 03:20:59.689320", + "step": 111, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:59.734928", + "step": 111, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0430477000772953, + "timestamp": "2025-10-01 03:20:59.766290", + "step": 112, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:59.807469", + "step": 112, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03618874400854111, + "timestamp": "2025-10-01 03:20:59.817325", + "step": 113, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:20:59.856183", + "step": 113, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.062492627650499344, + "timestamp": "2025-10-01 03:20:59.868334", + "step": 114, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:59.905942", + "step": 114, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06153668090701103, + "timestamp": "2025-10-01 03:20:59.915484", + "step": 115, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:20:59.957843", + "step": 115, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04676983132958412, + "timestamp": "2025-10-01 03:20:59.987784", + "step": 116, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:00.024422", + "step": 116, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04995122179389, + "timestamp": "2025-10-01 03:21:00.033112", + "step": 117, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.070072", + "step": 117, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031583596020936966, + "timestamp": "2025-10-01 03:21:00.083825", + "step": 118, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.136179", + "step": 118, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03743937611579895, + "timestamp": "2025-10-01 03:21:00.151355", + "step": 119, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:00.191506", + "step": 119, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031546588987112045, + "timestamp": "2025-10-01 03:21:00.225380", + "step": 120, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.270029", + "step": 120, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02248348481953144, + "timestamp": "2025-10-01 03:21:00.273636", + "step": 121, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.316522", + "step": 121, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030099431052803993, + "timestamp": "2025-10-01 03:21:00.327035", + "step": 122, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.366496", + "step": 122, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05775203928351402, + "timestamp": "2025-10-01 03:21:00.380096", + "step": 123, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.419946", + "step": 123, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040778081864118576, + "timestamp": "2025-10-01 03:21:00.450873", + "step": 124, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.488761", + "step": 124, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06953594833612442, + "timestamp": "2025-10-01 03:21:00.497907", + "step": 125, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.535257", + "step": 125, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02045215293765068, + "timestamp": "2025-10-01 03:21:00.546529", + "step": 126, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.583785", + "step": 126, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0454011969268322, + "timestamp": "2025-10-01 03:21:00.592766", + "step": 127, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.630302", + "step": 127, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038553494960069656, + "timestamp": "2025-10-01 03:21:00.661150", + "step": 128, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.693273", + "step": 128, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06179556995630264, + "timestamp": "2025-10-01 03:21:00.703662", + "step": 129, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.740902", + "step": 129, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028852978721261024, + "timestamp": "2025-10-01 03:21:00.753088", + "step": 130, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:00.791264", + "step": 130, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032880399376153946, + "timestamp": "2025-10-01 03:21:00.795119", + "step": 131, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.832295", + "step": 131, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02469795010983944, + "timestamp": "2025-10-01 03:21:00.862731", + "step": 132, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.894221", + "step": 132, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019833151251077652, + "timestamp": "2025-10-01 03:21:00.902167", + "step": 133, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.939639", + "step": 133, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01297033578157425, + "timestamp": "2025-10-01 03:21:00.950197", + "step": 134, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:00.988839", + "step": 134, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02837681956589222, + "timestamp": "2025-10-01 03:21:00.992821", + "step": 135, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.025007", + "step": 135, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021354641765356064, + "timestamp": "2025-10-01 03:21:01.050953", + "step": 136, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:01.088725", + "step": 136, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.045004185289144516, + "timestamp": "2025-10-01 03:21:01.099164", + "step": 137, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.137163", + "step": 137, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015489950776100159, + "timestamp": "2025-10-01 03:21:01.145506", + "step": 138, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:01.189506", + "step": 138, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040904562920331955, + "timestamp": "2025-10-01 03:21:01.200060", + "step": 139, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.238247", + "step": 139, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015452435240149498, + "timestamp": "2025-10-01 03:21:01.263175", + "step": 140, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.304869", + "step": 140, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02486804686486721, + "timestamp": "2025-10-01 03:21:01.319482", + "step": 141, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.361540", + "step": 141, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06222965568304062, + "timestamp": "2025-10-01 03:21:01.376311", + "step": 142, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.419568", + "step": 142, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023580899462103844, + "timestamp": "2025-10-01 03:21:01.436582", + "step": 143, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.482035", + "step": 143, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02628621831536293, + "timestamp": "2025-10-01 03:21:01.517361", + "step": 144, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.557059", + "step": 144, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02305951714515686, + "timestamp": "2025-10-01 03:21:01.566729", + "step": 145, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.608677", + "step": 145, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02379428967833519, + "timestamp": "2025-10-01 03:21:01.623865", + "step": 146, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.662949", + "step": 146, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037490732967853546, + "timestamp": "2025-10-01 03:21:01.673882", + "step": 147, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.713017", + "step": 147, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005286350846290588, + "timestamp": "2025-10-01 03:21:01.745126", + "step": 148, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.785759", + "step": 148, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015965498983860016, + "timestamp": "2025-10-01 03:21:01.794996", + "step": 149, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.830346", + "step": 149, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035151101648807526, + "timestamp": "2025-10-01 03:21:01.840371", + "step": 150, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:01.881465", + "step": 150, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05009964853525162, + "timestamp": "2025-10-01 03:21:01.888855", + "step": 151, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:01.923884", + "step": 151, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02867976762354374, + "timestamp": "2025-10-01 03:21:01.959662", + "step": 152, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:21:04.173665", + "step": 152, + "epoch": 1 + }, + { + "type": "pplx", + "content": 2371115.87404427, + "timestamp": "2025-10-01 03:21:04.183643", + "step": 152, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:04.220307", + "step": 152, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07416306436061859, + "timestamp": "2025-10-01 03:21:04.229440", + "step": 153, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:04.266047", + "step": 153, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0359635129570961, + "timestamp": "2025-10-01 03:21:04.275028", + "step": 154, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:04.315805", + "step": 154, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043954234570264816, + "timestamp": "2025-10-01 03:21:04.324385", + "step": 155, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:04.366914", + "step": 155, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07961083948612213, + "timestamp": "2025-10-01 03:21:04.397156", + "step": 156, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:04.433282", + "step": 156, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004218070302158594, + "timestamp": "2025-10-01 03:21:04.442996", + "step": 157, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:04.482463", + "step": 157, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006747664418071508, + "timestamp": "2025-10-01 03:21:04.491253", + "step": 158, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:04.529052", + "step": 158, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020197764039039612, + "timestamp": "2025-10-01 03:21:04.537750", + "step": 159, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:04.577232", + "step": 159, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05253985524177551, + "timestamp": "2025-10-01 03:21:04.606977", + "step": 160, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:04.649588", + "step": 160, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07358653098344803, + "timestamp": "2025-10-01 03:21:04.659561", + "step": 161, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:04.696917", + "step": 161, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021651098504662514, + "timestamp": "2025-10-01 03:21:04.701953", + "step": 162, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:04.734765", + "step": 162, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02886177785694599, + "timestamp": "2025-10-01 03:21:04.739260", + "step": 163, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:04.772182", + "step": 163, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009941846132278442, + "timestamp": "2025-10-01 03:21:04.798857", + "step": 164, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:04.831638", + "step": 164, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027086544781923294, + "timestamp": "2025-10-01 03:21:04.835840", + "step": 165, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:04.868072", + "step": 165, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06207410618662834, + "timestamp": "2025-10-01 03:21:04.870871", + "step": 166, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:04.905548", + "step": 166, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03746230527758598, + "timestamp": "2025-10-01 03:21:04.909862", + "step": 167, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:04.941504", + "step": 167, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03161172196269035, + "timestamp": "2025-10-01 03:21:04.966981", + "step": 168, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:05.000389", + "step": 168, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05450361594557762, + "timestamp": "2025-10-01 03:21:05.004110", + "step": 169, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.036362", + "step": 169, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035093698650598526, + "timestamp": "2025-10-01 03:21:05.042178", + "step": 170, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.074119", + "step": 170, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035559702664613724, + "timestamp": "2025-10-01 03:21:05.079262", + "step": 171, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:05.111348", + "step": 171, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04657293111085892, + "timestamp": "2025-10-01 03:21:05.137010", + "step": 172, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.169255", + "step": 172, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04297120124101639, + "timestamp": "2025-10-01 03:21:05.175515", + "step": 173, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.209472", + "step": 173, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040133409202098846, + "timestamp": "2025-10-01 03:21:05.215865", + "step": 174, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.251839", + "step": 174, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05311053618788719, + "timestamp": "2025-10-01 03:21:05.257397", + "step": 175, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.292050", + "step": 175, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033866036683321, + "timestamp": "2025-10-01 03:21:05.319157", + "step": 176, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.354253", + "step": 176, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040346793830394745, + "timestamp": "2025-10-01 03:21:05.358886", + "step": 177, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.390634", + "step": 177, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02014218457043171, + "timestamp": "2025-10-01 03:21:05.394600", + "step": 178, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.427290", + "step": 178, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004093941766768694, + "timestamp": "2025-10-01 03:21:05.432085", + "step": 179, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.465268", + "step": 179, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06743888556957245, + "timestamp": "2025-10-01 03:21:05.491371", + "step": 180, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:05.523397", + "step": 180, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005801581311970949, + "timestamp": "2025-10-01 03:21:05.528030", + "step": 181, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.563070", + "step": 181, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008720402605831623, + "timestamp": "2025-10-01 03:21:05.566676", + "step": 182, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.598588", + "step": 182, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06034347414970398, + "timestamp": "2025-10-01 03:21:05.601671", + "step": 183, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:05.634898", + "step": 183, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027213165536522865, + "timestamp": "2025-10-01 03:21:05.661626", + "step": 184, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.693950", + "step": 184, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05765910819172859, + "timestamp": "2025-10-01 03:21:05.697477", + "step": 185, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.731078", + "step": 185, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02430071495473385, + "timestamp": "2025-10-01 03:21:05.736106", + "step": 186, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.773194", + "step": 186, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030177028849720955, + "timestamp": "2025-10-01 03:21:05.777165", + "step": 187, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.814472", + "step": 187, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07561784982681274, + "timestamp": "2025-10-01 03:21:05.846203", + "step": 188, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.879698", + "step": 188, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03621077165007591, + "timestamp": "2025-10-01 03:21:05.883027", + "step": 189, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.916821", + "step": 189, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0242222398519516, + "timestamp": "2025-10-01 03:21:05.921475", + "step": 190, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:05.956129", + "step": 190, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05731888860464096, + "timestamp": "2025-10-01 03:21:05.967814", + "step": 191, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.006123", + "step": 191, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.061146873980760574, + "timestamp": "2025-10-01 03:21:06.034143", + "step": 192, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.069647", + "step": 192, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046887051314115524, + "timestamp": "2025-10-01 03:21:06.073864", + "step": 193, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.107468", + "step": 193, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03253694623708725, + "timestamp": "2025-10-01 03:21:06.110869", + "step": 194, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.146508", + "step": 194, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030570227652788162, + "timestamp": "2025-10-01 03:21:06.150332", + "step": 195, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.186985", + "step": 195, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03721844404935837, + "timestamp": "2025-10-01 03:21:06.219064", + "step": 196, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:06.253198", + "step": 196, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035361263900995255, + "timestamp": "2025-10-01 03:21:06.261950", + "step": 197, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.298863", + "step": 197, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05098510533571243, + "timestamp": "2025-10-01 03:21:06.303753", + "step": 198, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.337126", + "step": 198, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0763213187456131, + "timestamp": "2025-10-01 03:21:06.342980", + "step": 199, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.376117", + "step": 199, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06172468885779381, + "timestamp": "2025-10-01 03:21:06.402809", + "step": 200, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.437543", + "step": 200, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012934948317706585, + "timestamp": "2025-10-01 03:21:06.441248", + "step": 201, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:06.480232", + "step": 201, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02090090699493885, + "timestamp": "2025-10-01 03:21:06.484664", + "step": 202, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.518310", + "step": 202, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03670268505811691, + "timestamp": "2025-10-01 03:21:06.522922", + "step": 203, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.556751", + "step": 203, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03894392028450966, + "timestamp": "2025-10-01 03:21:06.582343", + "step": 204, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.615107", + "step": 204, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008799281902611256, + "timestamp": "2025-10-01 03:21:06.620261", + "step": 205, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.652508", + "step": 205, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0505724772810936, + "timestamp": "2025-10-01 03:21:06.656646", + "step": 206, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.688925", + "step": 206, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04091080650687218, + "timestamp": "2025-10-01 03:21:06.692631", + "step": 207, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.725256", + "step": 207, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03161032497882843, + "timestamp": "2025-10-01 03:21:06.751701", + "step": 208, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.785577", + "step": 208, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03986361250281334, + "timestamp": "2025-10-01 03:21:06.789707", + "step": 209, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.824438", + "step": 209, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020918849855661392, + "timestamp": "2025-10-01 03:21:06.828902", + "step": 210, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.862587", + "step": 210, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06269636005163193, + "timestamp": "2025-10-01 03:21:06.867453", + "step": 211, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:06.901520", + "step": 211, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026397576555609703, + "timestamp": "2025-10-01 03:21:06.927304", + "step": 212, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:06.960945", + "step": 212, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05206042900681496, + "timestamp": "2025-10-01 03:21:06.967096", + "step": 213, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.000144", + "step": 213, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05507514625787735, + "timestamp": "2025-10-01 03:21:07.003665", + "step": 214, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:07.040000", + "step": 214, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06109204888343811, + "timestamp": "2025-10-01 03:21:07.042425", + "step": 215, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.083873", + "step": 215, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026771023869514465, + "timestamp": "2025-10-01 03:21:07.111407", + "step": 216, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.147084", + "step": 216, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0236932635307312, + "timestamp": "2025-10-01 03:21:07.154123", + "step": 217, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.189194", + "step": 217, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04847824573516846, + "timestamp": "2025-10-01 03:21:07.196059", + "step": 218, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.231792", + "step": 218, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05679075047373772, + "timestamp": "2025-10-01 03:21:07.238372", + "step": 219, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.273006", + "step": 219, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08029156923294067, + "timestamp": "2025-10-01 03:21:07.299899", + "step": 220, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:07.335552", + "step": 220, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024638356640934944, + "timestamp": "2025-10-01 03:21:07.342395", + "step": 221, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.378020", + "step": 221, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04872479662299156, + "timestamp": "2025-10-01 03:21:07.383645", + "step": 222, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.419259", + "step": 222, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06075391173362732, + "timestamp": "2025-10-01 03:21:07.424551", + "step": 223, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.458223", + "step": 223, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05128519982099533, + "timestamp": "2025-10-01 03:21:07.487059", + "step": 224, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:07.521304", + "step": 224, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05521547049283981, + "timestamp": "2025-10-01 03:21:07.528060", + "step": 225, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.565959", + "step": 225, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027485044673085213, + "timestamp": "2025-10-01 03:21:07.572213", + "step": 226, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:07.607348", + "step": 226, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07571954280138016, + "timestamp": "2025-10-01 03:21:07.611522", + "step": 227, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.645384", + "step": 227, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040543098002672195, + "timestamp": "2025-10-01 03:21:07.672242", + "step": 228, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:07.708334", + "step": 228, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023556789383292198, + "timestamp": "2025-10-01 03:21:07.712506", + "step": 229, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.744752", + "step": 229, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.051129166036844254, + "timestamp": "2025-10-01 03:21:07.749917", + "step": 230, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.782186", + "step": 230, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01715698093175888, + "timestamp": "2025-10-01 03:21:07.787298", + "step": 231, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:07.820858", + "step": 231, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019109785556793213, + "timestamp": "2025-10-01 03:21:07.847410", + "step": 232, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.879402", + "step": 232, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04312221333384514, + "timestamp": "2025-10-01 03:21:07.883193", + "step": 233, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:07.917528", + "step": 233, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040985845029354095, + "timestamp": "2025-10-01 03:21:07.923841", + "step": 234, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:07.959461", + "step": 234, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05949518829584122, + "timestamp": "2025-10-01 03:21:07.971187", + "step": 235, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.014353", + "step": 235, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011875673197209835, + "timestamp": "2025-10-01 03:21:08.038323", + "step": 236, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:08.073542", + "step": 236, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02449009381234646, + "timestamp": "2025-10-01 03:21:08.081234", + "step": 237, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.117359", + "step": 237, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01885351911187172, + "timestamp": "2025-10-01 03:21:08.123387", + "step": 238, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.158883", + "step": 238, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034421950578689575, + "timestamp": "2025-10-01 03:21:08.164372", + "step": 239, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.200528", + "step": 239, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026185203343629837, + "timestamp": "2025-10-01 03:21:08.229173", + "step": 240, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.261162", + "step": 240, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01076290849596262, + "timestamp": "2025-10-01 03:21:08.267333", + "step": 241, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.303536", + "step": 241, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05293193832039833, + "timestamp": "2025-10-01 03:21:08.307870", + "step": 242, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:08.343066", + "step": 242, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03448881953954697, + "timestamp": "2025-10-01 03:21:08.349187", + "step": 243, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.392028", + "step": 243, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020484697073698044, + "timestamp": "2025-10-01 03:21:08.419971", + "step": 244, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.456762", + "step": 244, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.060300562530756, + "timestamp": "2025-10-01 03:21:08.462864", + "step": 245, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.500710", + "step": 245, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.042078692466020584, + "timestamp": "2025-10-01 03:21:08.503001", + "step": 246, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.540195", + "step": 246, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05121230706572533, + "timestamp": "2025-10-01 03:21:08.547294", + "step": 247, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:08.593760", + "step": 247, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032653141766786575, + "timestamp": "2025-10-01 03:21:08.621012", + "step": 248, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.660012", + "step": 248, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04026014357805252, + "timestamp": "2025-10-01 03:21:08.666772", + "step": 249, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.703420", + "step": 249, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033650875091552734, + "timestamp": "2025-10-01 03:21:08.709884", + "step": 250, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:08.745243", + "step": 250, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03580217808485031, + "timestamp": "2025-10-01 03:21:08.751638", + "step": 251, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.786127", + "step": 251, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.060095518827438354, + "timestamp": "2025-10-01 03:21:08.813646", + "step": 252, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:08.849259", + "step": 252, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0698780044913292, + "timestamp": "2025-10-01 03:21:08.856136", + "step": 253, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.896402", + "step": 253, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03791627660393715, + "timestamp": "2025-10-01 03:21:08.902454", + "step": 254, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.936596", + "step": 254, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030317185446619987, + "timestamp": "2025-10-01 03:21:08.942422", + "step": 255, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:08.973726", + "step": 255, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04531251639127731, + "timestamp": "2025-10-01 03:21:09.002429", + "step": 256, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:09.036897", + "step": 256, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040513776242733, + "timestamp": "2025-10-01 03:21:09.042780", + "step": 257, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.077277", + "step": 257, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04363056644797325, + "timestamp": "2025-10-01 03:21:09.079977", + "step": 258, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.117544", + "step": 258, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022667689248919487, + "timestamp": "2025-10-01 03:21:09.123462", + "step": 259, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.159164", + "step": 259, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03234158828854561, + "timestamp": "2025-10-01 03:21:09.186192", + "step": 260, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.219869", + "step": 260, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04938144236803055, + "timestamp": "2025-10-01 03:21:09.226279", + "step": 261, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.261182", + "step": 261, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025039618834853172, + "timestamp": "2025-10-01 03:21:09.266547", + "step": 262, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.306219", + "step": 262, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03265480697154999, + "timestamp": "2025-10-01 03:21:09.312942", + "step": 263, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.346678", + "step": 263, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038345880806446075, + "timestamp": "2025-10-01 03:21:09.374460", + "step": 264, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.411399", + "step": 264, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03315390273928642, + "timestamp": "2025-10-01 03:21:09.419160", + "step": 265, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:09.456149", + "step": 265, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012200199998915195, + "timestamp": "2025-10-01 03:21:09.466002", + "step": 266, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.503819", + "step": 266, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014039868488907814, + "timestamp": "2025-10-01 03:21:09.514298", + "step": 267, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.552398", + "step": 267, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02092992700636387, + "timestamp": "2025-10-01 03:21:09.582358", + "step": 268, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.621187", + "step": 268, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02060944028198719, + "timestamp": "2025-10-01 03:21:09.628928", + "step": 269, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.666188", + "step": 269, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06456822901964188, + "timestamp": "2025-10-01 03:21:09.677323", + "step": 270, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.718180", + "step": 270, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016088221222162247, + "timestamp": "2025-10-01 03:21:09.726745", + "step": 271, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.764130", + "step": 271, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04096638411283493, + "timestamp": "2025-10-01 03:21:09.796089", + "step": 272, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:09.834834", + "step": 272, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01842545159161091, + "timestamp": "2025-10-01 03:21:09.844054", + "step": 273, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.873917", + "step": 273, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0491507351398468, + "timestamp": "2025-10-01 03:21:09.882973", + "step": 274, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:09.920825", + "step": 274, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0486728772521019, + "timestamp": "2025-10-01 03:21:09.931685", + "step": 275, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:09.970076", + "step": 275, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01887432299554348, + "timestamp": "2025-10-01 03:21:10.001182", + "step": 276, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:10.038704", + "step": 276, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.09505164623260498, + "timestamp": "2025-10-01 03:21:10.049458", + "step": 277, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.086005", + "step": 277, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02803880348801613, + "timestamp": "2025-10-01 03:21:10.095490", + "step": 278, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.132575", + "step": 278, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.09221839159727097, + "timestamp": "2025-10-01 03:21:10.141765", + "step": 279, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:10.180603", + "step": 279, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04076153039932251, + "timestamp": "2025-10-01 03:21:10.211896", + "step": 280, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.249181", + "step": 280, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026530565693974495, + "timestamp": "2025-10-01 03:21:10.259119", + "step": 281, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.297779", + "step": 281, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.048008885234594345, + "timestamp": "2025-10-01 03:21:10.306185", + "step": 282, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.344030", + "step": 282, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025746818631887436, + "timestamp": "2025-10-01 03:21:10.354539", + "step": 283, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.392463", + "step": 283, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031080612912774086, + "timestamp": "2025-10-01 03:21:10.424881", + "step": 284, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:10.462172", + "step": 284, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02354593575000763, + "timestamp": "2025-10-01 03:21:10.471708", + "step": 285, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.508645", + "step": 285, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049186427146196365, + "timestamp": "2025-10-01 03:21:10.520251", + "step": 286, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.557985", + "step": 286, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00826980359852314, + "timestamp": "2025-10-01 03:21:10.567812", + "step": 287, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.607468", + "step": 287, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03570196405053139, + "timestamp": "2025-10-01 03:21:10.639111", + "step": 288, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.679068", + "step": 288, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035737454891204834, + "timestamp": "2025-10-01 03:21:10.688083", + "step": 289, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.725907", + "step": 289, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04640930891036987, + "timestamp": "2025-10-01 03:21:10.736417", + "step": 290, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.774569", + "step": 290, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040902506560087204, + "timestamp": "2025-10-01 03:21:10.785817", + "step": 291, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.823782", + "step": 291, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.09198614209890366, + "timestamp": "2025-10-01 03:21:10.854429", + "step": 292, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.887286", + "step": 292, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06674636900424957, + "timestamp": "2025-10-01 03:21:10.897387", + "step": 293, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:10.933947", + "step": 293, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07198696583509445, + "timestamp": "2025-10-01 03:21:10.944925", + "step": 294, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:10.982925", + "step": 294, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05689097195863724, + "timestamp": "2025-10-01 03:21:10.993780", + "step": 295, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:11.032622", + "step": 295, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.047646768391132355, + "timestamp": "2025-10-01 03:21:11.058668", + "step": 296, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:11.097410", + "step": 296, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05473044514656067, + "timestamp": "2025-10-01 03:21:11.106124", + "step": 297, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:11.145442", + "step": 297, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04441896453499794, + "timestamp": "2025-10-01 03:21:11.154477", + "step": 298, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:11.192585", + "step": 298, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041898664087057114, + "timestamp": "2025-10-01 03:21:11.200518", + "step": 299, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:11.237977", + "step": 299, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03288258984684944, + "timestamp": "2025-10-01 03:21:11.267067", + "step": 300, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:11.305419", + "step": 300, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029331600293517113, + "timestamp": "2025-10-01 03:21:11.315629", + "step": 301, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:11.354289", + "step": 301, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.053104352205991745, + "timestamp": "2025-10-01 03:21:11.362943", + "step": 302, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:11.399662", + "step": 302, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03217674791812897, + "timestamp": "2025-10-01 03:21:11.409547", + "step": 303, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:11.445081", + "step": 303, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02352527715265751, + "timestamp": "2025-10-01 03:21:11.475333", + "step": 304, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:21:13.755104", + "step": 304, + "epoch": 1 + }, + { + "type": "pplx", + "content": 2635909.0997066367, + "timestamp": "2025-10-01 03:21:13.763785", + "step": 304, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:13.800293", + "step": 304, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06294579058885574, + "timestamp": "2025-10-01 03:21:13.811681", + "step": 305, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:13.848666", + "step": 305, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05934818461537361, + "timestamp": "2025-10-01 03:21:13.853190", + "step": 306, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:13.884803", + "step": 306, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025813262909650803, + "timestamp": "2025-10-01 03:21:13.891784", + "step": 307, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:13.934519", + "step": 307, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030826464295387268, + "timestamp": "2025-10-01 03:21:13.962312", + "step": 308, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:14.001415", + "step": 308, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030302438884973526, + "timestamp": "2025-10-01 03:21:14.007267", + "step": 309, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.043121", + "step": 309, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025672927498817444, + "timestamp": "2025-10-01 03:21:14.049315", + "step": 310, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:14.084511", + "step": 310, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04249703139066696, + "timestamp": "2025-10-01 03:21:14.090941", + "step": 311, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.126676", + "step": 311, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018733439967036247, + "timestamp": "2025-10-01 03:21:14.153736", + "step": 312, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.188756", + "step": 312, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0510123074054718, + "timestamp": "2025-10-01 03:21:14.196861", + "step": 313, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:14.234728", + "step": 313, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03964235261082649, + "timestamp": "2025-10-01 03:21:14.238582", + "step": 314, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.270772", + "step": 314, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013344081118702888, + "timestamp": "2025-10-01 03:21:14.275440", + "step": 315, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.310687", + "step": 315, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018109554424881935, + "timestamp": "2025-10-01 03:21:14.336541", + "step": 316, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:14.379238", + "step": 316, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025358552113175392, + "timestamp": "2025-10-01 03:21:14.383533", + "step": 317, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.416037", + "step": 317, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0389687716960907, + "timestamp": "2025-10-01 03:21:14.420349", + "step": 318, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.452977", + "step": 318, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026115987449884415, + "timestamp": "2025-10-01 03:21:14.457667", + "step": 319, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.496511", + "step": 319, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02937491610646248, + "timestamp": "2025-10-01 03:21:14.522650", + "step": 320, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.556601", + "step": 320, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04070219770073891, + "timestamp": "2025-10-01 03:21:14.560233", + "step": 321, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:14.594879", + "step": 321, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.051409609615802765, + "timestamp": "2025-10-01 03:21:14.599288", + "step": 322, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:14.633779", + "step": 322, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020277127623558044, + "timestamp": "2025-10-01 03:21:14.638478", + "step": 323, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.671001", + "step": 323, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027128782123327255, + "timestamp": "2025-10-01 03:21:14.697836", + "step": 324, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:14.734893", + "step": 324, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0612175352871418, + "timestamp": "2025-10-01 03:21:14.741482", + "step": 325, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.778779", + "step": 325, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04738769307732582, + "timestamp": "2025-10-01 03:21:14.781098", + "step": 326, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.819266", + "step": 326, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021857798099517822, + "timestamp": "2025-10-01 03:21:14.823102", + "step": 327, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.856122", + "step": 327, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023989515379071236, + "timestamp": "2025-10-01 03:21:14.880139", + "step": 328, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.913315", + "step": 328, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.050543539226055145, + "timestamp": "2025-10-01 03:21:14.917810", + "step": 329, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.953291", + "step": 329, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03639816865324974, + "timestamp": "2025-10-01 03:21:14.957446", + "step": 330, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:14.991532", + "step": 330, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02381560020148754, + "timestamp": "2025-10-01 03:21:14.996496", + "step": 331, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:15.036370", + "step": 331, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06295320391654968, + "timestamp": "2025-10-01 03:21:15.063724", + "step": 332, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.099774", + "step": 332, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0250781811773777, + "timestamp": "2025-10-01 03:21:15.103112", + "step": 333, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.135446", + "step": 333, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.044613711535930634, + "timestamp": "2025-10-01 03:21:15.139302", + "step": 334, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.174323", + "step": 334, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03778538107872009, + "timestamp": "2025-10-01 03:21:15.178741", + "step": 335, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.212821", + "step": 335, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049594197422266006, + "timestamp": "2025-10-01 03:21:15.237572", + "step": 336, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.271876", + "step": 336, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05178217962384224, + "timestamp": "2025-10-01 03:21:15.275892", + "step": 337, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.308285", + "step": 337, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03824915364384651, + "timestamp": "2025-10-01 03:21:15.312747", + "step": 338, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.346780", + "step": 338, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05189288780093193, + "timestamp": "2025-10-01 03:21:15.351754", + "step": 339, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:15.387765", + "step": 339, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01864314079284668, + "timestamp": "2025-10-01 03:21:15.412748", + "step": 340, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.445936", + "step": 340, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010373003780841827, + "timestamp": "2025-10-01 03:21:15.449808", + "step": 341, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.481201", + "step": 341, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02990402840077877, + "timestamp": "2025-10-01 03:21:15.487667", + "step": 342, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.522529", + "step": 342, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.047456271946430206, + "timestamp": "2025-10-01 03:21:15.527324", + "step": 343, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.561792", + "step": 343, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024180978536605835, + "timestamp": "2025-10-01 03:21:15.587279", + "step": 344, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:15.620773", + "step": 344, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030872391536831856, + "timestamp": "2025-10-01 03:21:15.624754", + "step": 345, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.659673", + "step": 345, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0424276664853096, + "timestamp": "2025-10-01 03:21:15.665006", + "step": 346, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.704464", + "step": 346, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03602544590830803, + "timestamp": "2025-10-01 03:21:15.708676", + "step": 347, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.745192", + "step": 347, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04484555125236511, + "timestamp": "2025-10-01 03:21:15.771003", + "step": 348, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.809625", + "step": 348, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010752486996352673, + "timestamp": "2025-10-01 03:21:15.814927", + "step": 349, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.854195", + "step": 349, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05483372136950493, + "timestamp": "2025-10-01 03:21:15.858048", + "step": 350, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.894801", + "step": 350, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.052938614040613174, + "timestamp": "2025-10-01 03:21:15.898637", + "step": 351, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.933075", + "step": 351, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0372745655477047, + "timestamp": "2025-10-01 03:21:15.958397", + "step": 352, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:15.996090", + "step": 352, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.045854467898607254, + "timestamp": "2025-10-01 03:21:15.999409", + "step": 353, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.036376", + "step": 353, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04441056400537491, + "timestamp": "2025-10-01 03:21:16.039833", + "step": 354, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:16.072906", + "step": 354, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03690371289849281, + "timestamp": "2025-10-01 03:21:16.076643", + "step": 355, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.113061", + "step": 355, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.057356756180524826, + "timestamp": "2025-10-01 03:21:16.137445", + "step": 356, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.169863", + "step": 356, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0411955900490284, + "timestamp": "2025-10-01 03:21:16.172626", + "step": 357, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.204526", + "step": 357, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02961675263941288, + "timestamp": "2025-10-01 03:21:16.207865", + "step": 358, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.250378", + "step": 358, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0411757156252861, + "timestamp": "2025-10-01 03:21:16.253753", + "step": 359, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.290078", + "step": 359, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03133092448115349, + "timestamp": "2025-10-01 03:21:16.315435", + "step": 360, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.353651", + "step": 360, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02828999049961567, + "timestamp": "2025-10-01 03:21:16.357190", + "step": 361, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.391956", + "step": 361, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05366725102066994, + "timestamp": "2025-10-01 03:21:16.395890", + "step": 362, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:16.433134", + "step": 362, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046885523945093155, + "timestamp": "2025-10-01 03:21:16.435959", + "step": 363, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.469949", + "step": 363, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.054073285311460495, + "timestamp": "2025-10-01 03:21:16.495036", + "step": 364, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.532261", + "step": 364, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031410034745931625, + "timestamp": "2025-10-01 03:21:16.535265", + "step": 365, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.571616", + "step": 365, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04697310924530029, + "timestamp": "2025-10-01 03:21:16.574589", + "step": 366, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.610512", + "step": 366, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03616165742278099, + "timestamp": "2025-10-01 03:21:16.622661", + "step": 367, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.657533", + "step": 367, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015805311501026154, + "timestamp": "2025-10-01 03:21:16.684309", + "step": 368, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:16.718241", + "step": 368, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025174805894494057, + "timestamp": "2025-10-01 03:21:16.723624", + "step": 369, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.796031", + "step": 369, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033946443349123, + "timestamp": "2025-10-01 03:21:16.798790", + "step": 370, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.831200", + "step": 370, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04043666273355484, + "timestamp": "2025-10-01 03:21:16.834792", + "step": 371, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.881051", + "step": 371, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024279937148094177, + "timestamp": "2025-10-01 03:21:16.905835", + "step": 372, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.942122", + "step": 372, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0068135918118059635, + "timestamp": "2025-10-01 03:21:16.945101", + "step": 373, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:16.978278", + "step": 373, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01405350212007761, + "timestamp": "2025-10-01 03:21:16.981068", + "step": 374, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.012448", + "step": 374, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016704119741916656, + "timestamp": "2025-10-01 03:21:17.015829", + "step": 375, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.047985", + "step": 375, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033454347401857376, + "timestamp": "2025-10-01 03:21:17.073042", + "step": 376, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.107448", + "step": 376, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01625843159854412, + "timestamp": "2025-10-01 03:21:17.111754", + "step": 377, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.148093", + "step": 377, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012884716503322124, + "timestamp": "2025-10-01 03:21:17.151443", + "step": 378, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.187292", + "step": 378, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03828751668334007, + "timestamp": "2025-10-01 03:21:17.190628", + "step": 379, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.227735", + "step": 379, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018071478232741356, + "timestamp": "2025-10-01 03:21:17.253258", + "step": 380, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.296806", + "step": 380, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01790255308151245, + "timestamp": "2025-10-01 03:21:17.299791", + "step": 381, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.334047", + "step": 381, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023014387115836143, + "timestamp": "2025-10-01 03:21:17.338415", + "step": 382, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.373046", + "step": 382, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03396828472614288, + "timestamp": "2025-10-01 03:21:17.376810", + "step": 383, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.410865", + "step": 383, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025865623727440834, + "timestamp": "2025-10-01 03:21:17.436185", + "step": 384, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:17.470729", + "step": 384, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05275615304708481, + "timestamp": "2025-10-01 03:21:17.473637", + "step": 385, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.508113", + "step": 385, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023788463324308395, + "timestamp": "2025-10-01 03:21:17.510462", + "step": 386, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.550969", + "step": 386, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06861090660095215, + "timestamp": "2025-10-01 03:21:17.553957", + "step": 387, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:17.588289", + "step": 387, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043219029903411865, + "timestamp": "2025-10-01 03:21:17.612771", + "step": 388, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:17.646050", + "step": 388, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008304628543555737, + "timestamp": "2025-10-01 03:21:17.648636", + "step": 389, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:17.684516", + "step": 389, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.045079708099365234, + "timestamp": "2025-10-01 03:21:17.686562", + "step": 390, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.723113", + "step": 390, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.10254204273223877, + "timestamp": "2025-10-01 03:21:17.725144", + "step": 391, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.759274", + "step": 391, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012852299027144909, + "timestamp": "2025-10-01 03:21:17.783115", + "step": 392, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:17.818447", + "step": 392, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02198074944317341, + "timestamp": "2025-10-01 03:21:17.821428", + "step": 393, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:17.860269", + "step": 393, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008588139899075031, + "timestamp": "2025-10-01 03:21:17.862778", + "step": 394, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:17.900249", + "step": 394, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04287237673997879, + "timestamp": "2025-10-01 03:21:17.902565", + "step": 395, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.939470", + "step": 395, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02676987834274769, + "timestamp": "2025-10-01 03:21:17.963897", + "step": 396, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:17.996636", + "step": 396, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04253643751144409, + "timestamp": "2025-10-01 03:21:17.999155", + "step": 397, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.031030", + "step": 397, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03064507059752941, + "timestamp": "2025-10-01 03:21:18.033580", + "step": 398, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.064665", + "step": 398, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08572976291179657, + "timestamp": "2025-10-01 03:21:18.067202", + "step": 399, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.103625", + "step": 399, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.1025778129696846, + "timestamp": "2025-10-01 03:21:18.127377", + "step": 400, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.158315", + "step": 400, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013830804266035557, + "timestamp": "2025-10-01 03:21:18.160379", + "step": 401, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.193621", + "step": 401, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02776501141488552, + "timestamp": "2025-10-01 03:21:18.195761", + "step": 402, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.233332", + "step": 402, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037247609347105026, + "timestamp": "2025-10-01 03:21:18.235749", + "step": 403, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.271870", + "step": 403, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03189929202198982, + "timestamp": "2025-10-01 03:21:18.295359", + "step": 404, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:21:18.329763", + "step": 404, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03428572043776512, + "timestamp": "2025-10-01 03:21:18.331699", + "step": 405, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.365056", + "step": 405, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02759367786347866, + "timestamp": "2025-10-01 03:21:18.366998", + "step": 406, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.400787", + "step": 406, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03497834876179695, + "timestamp": "2025-10-01 03:21:18.403161", + "step": 407, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.447167", + "step": 407, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04175318405032158, + "timestamp": "2025-10-01 03:21:18.471022", + "step": 408, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.510999", + "step": 408, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026505162939429283, + "timestamp": "2025-10-01 03:21:18.515330", + "step": 409, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.550810", + "step": 409, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03705447167158127, + "timestamp": "2025-10-01 03:21:18.552718", + "step": 410, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:18.588017", + "step": 410, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04808199405670166, + "timestamp": "2025-10-01 03:21:18.590116", + "step": 411, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.629078", + "step": 411, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016238873824477196, + "timestamp": "2025-10-01 03:21:18.653020", + "step": 412, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:18.687764", + "step": 412, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04498560354113579, + "timestamp": "2025-10-01 03:21:18.689800", + "step": 413, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.727543", + "step": 413, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036529093980789185, + "timestamp": "2025-10-01 03:21:18.729451", + "step": 414, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:18.764411", + "step": 414, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03303220495581627, + "timestamp": "2025-10-01 03:21:18.766653", + "step": 415, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.803003", + "step": 415, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028147801756858826, + "timestamp": "2025-10-01 03:21:18.826577", + "step": 416, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.859186", + "step": 416, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0753483772277832, + "timestamp": "2025-10-01 03:21:18.861266", + "step": 417, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.901341", + "step": 417, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03088216856122017, + "timestamp": "2025-10-01 03:21:18.903296", + "step": 418, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.951143", + "step": 418, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018586669117212296, + "timestamp": "2025-10-01 03:21:18.954373", + "step": 419, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:18.994893", + "step": 419, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0707685798406601, + "timestamp": "2025-10-01 03:21:19.018605", + "step": 420, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:19.051613", + "step": 420, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01660551317036152, + "timestamp": "2025-10-01 03:21:19.053657", + "step": 421, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:19.096053", + "step": 421, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049211032688617706, + "timestamp": "2025-10-01 03:21:19.098152", + "step": 422, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:19.133053", + "step": 422, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043362535536289215, + "timestamp": "2025-10-01 03:21:19.136259", + "step": 423, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:19.169282", + "step": 423, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04026709869503975, + "timestamp": "2025-10-01 03:21:19.193284", + "step": 424, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:19.235188", + "step": 424, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06537783145904541, + "timestamp": "2025-10-01 03:21:19.237833", + "step": 425, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:19.271281", + "step": 425, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04740477725863457, + "timestamp": "2025-10-01 03:21:19.273839", + "step": 426, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:19.308688", + "step": 426, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04837663844227791, + "timestamp": "2025-10-01 03:21:19.311232", + "step": 427, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:19.347298", + "step": 427, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020609335973858833, + "timestamp": "2025-10-01 03:21:19.373384", + "step": 428, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:19.427405", + "step": 428, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015082841739058495, + "timestamp": "2025-10-01 03:21:19.434540", + "step": 429, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:19.487117", + "step": 429, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05668017640709877, + "timestamp": "2025-10-01 03:21:19.490854", + "step": 430, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:19.554709", + "step": 430, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009251327253878117, + "timestamp": "2025-10-01 03:21:19.565826", + "step": 431, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:19.626146", + "step": 431, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0162136722356081, + "timestamp": "2025-10-01 03:21:19.663843", + "step": 432, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:19.712506", + "step": 432, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04565972462296486, + "timestamp": "2025-10-01 03:21:19.718477", + "step": 433, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:19.768924", + "step": 433, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.057140082120895386, + "timestamp": "2025-10-01 03:21:19.777795", + "step": 434, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:19.826311", + "step": 434, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03130960091948509, + "timestamp": "2025-10-01 03:21:19.832779", + "step": 435, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:19.876085", + "step": 435, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03621784225106239, + "timestamp": "2025-10-01 03:21:19.904911", + "step": 436, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:19.961304", + "step": 436, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04880484193563461, + "timestamp": "2025-10-01 03:21:19.965978", + "step": 437, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.015615", + "step": 437, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041375961154699326, + "timestamp": "2025-10-01 03:21:20.025388", + "step": 438, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.080960", + "step": 438, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04179920628666878, + "timestamp": "2025-10-01 03:21:20.094085", + "step": 439, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.151215", + "step": 439, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019370565190911293, + "timestamp": "2025-10-01 03:21:20.180794", + "step": 440, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.226875", + "step": 440, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05180008336901665, + "timestamp": "2025-10-01 03:21:20.235812", + "step": 441, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.271655", + "step": 441, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018917690962553024, + "timestamp": "2025-10-01 03:21:20.278922", + "step": 442, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.318029", + "step": 442, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.059014346450567245, + "timestamp": "2025-10-01 03:21:20.324089", + "step": 443, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:20.367314", + "step": 443, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037232764065265656, + "timestamp": "2025-10-01 03:21:20.395516", + "step": 444, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.444628", + "step": 444, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.054930996149778366, + "timestamp": "2025-10-01 03:21:20.451150", + "step": 445, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:20.505378", + "step": 445, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036230895668268204, + "timestamp": "2025-10-01 03:21:20.514465", + "step": 446, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.551414", + "step": 446, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029452670365571976, + "timestamp": "2025-10-01 03:21:20.561561", + "step": 447, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.600783", + "step": 447, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08241455256938934, + "timestamp": "2025-10-01 03:21:20.630024", + "step": 448, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:20.666623", + "step": 448, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03759985789656639, + "timestamp": "2025-10-01 03:21:20.671886", + "step": 449, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.718761", + "step": 449, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03170710802078247, + "timestamp": "2025-10-01 03:21:20.722324", + "step": 450, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.760436", + "step": 450, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012814481742680073, + "timestamp": "2025-10-01 03:21:20.765989", + "step": 451, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.809336", + "step": 451, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05082489922642708, + "timestamp": "2025-10-01 03:21:20.838674", + "step": 452, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.890921", + "step": 452, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04178005829453468, + "timestamp": "2025-10-01 03:21:20.896902", + "step": 453, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:20.938939", + "step": 453, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015659945085644722, + "timestamp": "2025-10-01 03:21:20.944077", + "step": 454, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:20.983641", + "step": 454, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02921818383038044, + "timestamp": "2025-10-01 03:21:20.990069", + "step": 455, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:21.037254", + "step": 455, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07830890268087387, + "timestamp": "2025-10-01 03:21:21.062512", + "step": 456, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:21:24.502801", + "step": 456, + "epoch": 1 + }, + { + "type": "pplx", + "content": 3111323.1105926586, + "timestamp": "2025-10-01 03:21:24.518850", + "step": 456, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:24.564703", + "step": 456, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015061045996844769, + "timestamp": "2025-10-01 03:21:24.579466", + "step": 457, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:24.628343", + "step": 457, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028863925486803055, + "timestamp": "2025-10-01 03:21:24.644363", + "step": 458, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:24.693252", + "step": 458, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03519580140709877, + "timestamp": "2025-10-01 03:21:24.708021", + "step": 459, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:24.752876", + "step": 459, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0378088541328907, + "timestamp": "2025-10-01 03:21:24.790013", + "step": 460, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:24.836823", + "step": 460, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027751965448260307, + "timestamp": "2025-10-01 03:21:24.850634", + "step": 461, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:24.904005", + "step": 461, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0237320214509964, + "timestamp": "2025-10-01 03:21:24.916735", + "step": 462, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:24.962708", + "step": 462, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031173599883913994, + "timestamp": "2025-10-01 03:21:24.976376", + "step": 463, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:25.025674", + "step": 463, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0238236915320158, + "timestamp": "2025-10-01 03:21:25.060733", + "step": 464, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:25.106966", + "step": 464, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08687359094619751, + "timestamp": "2025-10-01 03:21:25.120879", + "step": 465, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:25.168251", + "step": 465, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033912044018507004, + "timestamp": "2025-10-01 03:21:25.181937", + "step": 466, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:25.239411", + "step": 466, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0473497211933136, + "timestamp": "2025-10-01 03:21:25.250935", + "step": 467, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:25.302016", + "step": 467, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04163804277777672, + "timestamp": "2025-10-01 03:21:25.335809", + "step": 468, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:25.380099", + "step": 468, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02686198242008686, + "timestamp": "2025-10-01 03:21:25.391696", + "step": 469, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:25.441898", + "step": 469, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01851971447467804, + "timestamp": "2025-10-01 03:21:25.452641", + "step": 470, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:25.496075", + "step": 470, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04267337918281555, + "timestamp": "2025-10-01 03:21:25.516983", + "step": 471, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:25.564778", + "step": 471, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04307543486356735, + "timestamp": "2025-10-01 03:21:25.600357", + "step": 472, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:25.657986", + "step": 472, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015285887755453587, + "timestamp": "2025-10-01 03:21:25.670095", + "step": 473, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:25.711944", + "step": 473, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07309208065271378, + "timestamp": "2025-10-01 03:21:25.722069", + "step": 474, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:25.778855", + "step": 474, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02003297209739685, + "timestamp": "2025-10-01 03:21:25.792139", + "step": 475, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:25.840968", + "step": 475, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.044590312987565994, + "timestamp": "2025-10-01 03:21:25.867137", + "step": 476, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:25.916478", + "step": 476, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03039819560945034, + "timestamp": "2025-10-01 03:21:25.930477", + "step": 477, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:25.971779", + "step": 477, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04165015369653702, + "timestamp": "2025-10-01 03:21:25.986416", + "step": 478, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:26.030745", + "step": 478, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03164893016219139, + "timestamp": "2025-10-01 03:21:26.045444", + "step": 479, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:26.095933", + "step": 479, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.054279081523418427, + "timestamp": "2025-10-01 03:21:26.127970", + "step": 480, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:26.171540", + "step": 480, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02784739062190056, + "timestamp": "2025-10-01 03:21:26.183745", + "step": 481, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:26.228009", + "step": 481, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05955734848976135, + "timestamp": "2025-10-01 03:21:26.240350", + "step": 482, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:26.291849", + "step": 482, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02169814333319664, + "timestamp": "2025-10-01 03:21:26.301575", + "step": 483, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:26.340251", + "step": 483, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05979861691594124, + "timestamp": "2025-10-01 03:21:26.372177", + "step": 484, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:26.417078", + "step": 484, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05913252755999565, + "timestamp": "2025-10-01 03:21:26.426947", + "step": 485, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:26.465300", + "step": 485, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0664086565375328, + "timestamp": "2025-10-01 03:21:26.480667", + "step": 486, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:26.526043", + "step": 486, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03568035736680031, + "timestamp": "2025-10-01 03:21:26.530189", + "step": 487, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:26.569880", + "step": 487, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02003902569413185, + "timestamp": "2025-10-01 03:21:26.596090", + "step": 488, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:26.645535", + "step": 488, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05694048851728439, + "timestamp": "2025-10-01 03:21:26.650120", + "step": 489, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:26.682790", + "step": 489, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020539311692118645, + "timestamp": "2025-10-01 03:21:26.694462", + "step": 490, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:26.747454", + "step": 490, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04576178640127182, + "timestamp": "2025-10-01 03:21:26.755379", + "step": 491, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:26.798200", + "step": 491, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06022220477461815, + "timestamp": "2025-10-01 03:21:26.829457", + "step": 492, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:26.871641", + "step": 492, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07608479261398315, + "timestamp": "2025-10-01 03:21:26.882276", + "step": 493, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:26.935315", + "step": 493, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05819416791200638, + "timestamp": "2025-10-01 03:21:26.945298", + "step": 494, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:26.994906", + "step": 494, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05386066064238548, + "timestamp": "2025-10-01 03:21:27.005823", + "step": 495, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:27.051297", + "step": 495, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024038270115852356, + "timestamp": "2025-10-01 03:21:27.083033", + "step": 496, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:27.132727", + "step": 496, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03350303694605827, + "timestamp": "2025-10-01 03:21:27.136151", + "step": 497, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:27.179476", + "step": 497, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03777259215712547, + "timestamp": "2025-10-01 03:21:27.189802", + "step": 498, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:27.232975", + "step": 498, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029502039775252342, + "timestamp": "2025-10-01 03:21:27.240721", + "step": 499, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:27.279792", + "step": 499, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02271374687552452, + "timestamp": "2025-10-01 03:21:27.311744", + "step": 500, + "epoch": 1 + }, + { + "type": "info", + "content": "Checkpoint saved at step 500", + "timestamp": "2025-10-01 03:21:32.497095", + "step": 500, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:32.538689", + "step": 500, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014928178861737251, + "timestamp": "2025-10-01 03:21:32.546232", + "step": 501, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:32.597295", + "step": 501, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01906161941587925, + "timestamp": "2025-10-01 03:21:32.605787", + "step": 502, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:32.645043", + "step": 502, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05676817148923874, + "timestamp": "2025-10-01 03:21:32.658357", + "step": 503, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:32.703326", + "step": 503, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031594809144735336, + "timestamp": "2025-10-01 03:21:32.730038", + "step": 504, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:32.775105", + "step": 504, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03479335084557533, + "timestamp": "2025-10-01 03:21:32.785458", + "step": 505, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:32.832354", + "step": 505, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04054738208651543, + "timestamp": "2025-10-01 03:21:32.842632", + "step": 506, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:32.892285", + "step": 506, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03043782338500023, + "timestamp": "2025-10-01 03:21:32.899518", + "step": 507, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:32.949598", + "step": 507, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05286935344338417, + "timestamp": "2025-10-01 03:21:32.974756", + "step": 508, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:33.029407", + "step": 508, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02375977672636509, + "timestamp": "2025-10-01 03:21:33.035554", + "step": 509, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:33.080054", + "step": 509, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043177634477615356, + "timestamp": "2025-10-01 03:21:33.090765", + "step": 510, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:33.135408", + "step": 510, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041195809841156006, + "timestamp": "2025-10-01 03:21:33.139640", + "step": 511, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:33.195056", + "step": 511, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04013251140713692, + "timestamp": "2025-10-01 03:21:33.221000", + "step": 512, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:33.261965", + "step": 512, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046260468661785126, + "timestamp": "2025-10-01 03:21:33.266791", + "step": 513, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:33.311779", + "step": 513, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05250737443566322, + "timestamp": "2025-10-01 03:21:33.316479", + "step": 514, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:33.364603", + "step": 514, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02815081737935543, + "timestamp": "2025-10-01 03:21:33.370768", + "step": 515, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:33.416849", + "step": 515, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04927343502640724, + "timestamp": "2025-10-01 03:21:33.443485", + "step": 516, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:33.480777", + "step": 516, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0074361348524689674, + "timestamp": "2025-10-01 03:21:33.486430", + "step": 517, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:33.529962", + "step": 517, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03634947910904884, + "timestamp": "2025-10-01 03:21:33.538534", + "step": 518, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:33.575954", + "step": 518, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036744557321071625, + "timestamp": "2025-10-01 03:21:33.581089", + "step": 519, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:33.625087", + "step": 519, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030309390276670456, + "timestamp": "2025-10-01 03:21:33.651690", + "step": 520, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:33.691781", + "step": 520, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017226731404662132, + "timestamp": "2025-10-01 03:21:33.697484", + "step": 521, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:33.735074", + "step": 521, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02051245979964733, + "timestamp": "2025-10-01 03:21:33.741033", + "step": 522, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:33.778705", + "step": 522, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04403200373053551, + "timestamp": "2025-10-01 03:21:33.785875", + "step": 523, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:33.825029", + "step": 523, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027913589030504227, + "timestamp": "2025-10-01 03:21:33.854794", + "step": 524, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:33.897135", + "step": 524, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013581112027168274, + "timestamp": "2025-10-01 03:21:33.902699", + "step": 525, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:33.943951", + "step": 525, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07374077290296555, + "timestamp": "2025-10-01 03:21:33.952226", + "step": 526, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.000056", + "step": 526, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024956421926617622, + "timestamp": "2025-10-01 03:21:34.004681", + "step": 527, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.051603", + "step": 527, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028059016913175583, + "timestamp": "2025-10-01 03:21:34.081347", + "step": 528, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.121360", + "step": 528, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04789089411497116, + "timestamp": "2025-10-01 03:21:34.127110", + "step": 529, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:34.172101", + "step": 529, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06340678036212921, + "timestamp": "2025-10-01 03:21:34.174724", + "step": 530, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:34.220269", + "step": 530, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01330691296607256, + "timestamp": "2025-10-01 03:21:34.225757", + "step": 531, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.263334", + "step": 531, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041142452508211136, + "timestamp": "2025-10-01 03:21:34.288321", + "step": 532, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:34.333675", + "step": 532, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02665702812373638, + "timestamp": "2025-10-01 03:21:34.338200", + "step": 533, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.381299", + "step": 533, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05184217914938927, + "timestamp": "2025-10-01 03:21:34.385157", + "step": 534, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.427365", + "step": 534, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04334848001599312, + "timestamp": "2025-10-01 03:21:34.429469", + "step": 535, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.463838", + "step": 535, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040276531130075455, + "timestamp": "2025-10-01 03:21:34.487490", + "step": 536, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.523449", + "step": 536, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020284174010157585, + "timestamp": "2025-10-01 03:21:34.525428", + "step": 537, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.559105", + "step": 537, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02560587227344513, + "timestamp": "2025-10-01 03:21:34.560913", + "step": 538, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.605656", + "step": 538, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021989600732922554, + "timestamp": "2025-10-01 03:21:34.607755", + "step": 539, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.642546", + "step": 539, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02235238440334797, + "timestamp": "2025-10-01 03:21:34.666686", + "step": 540, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.708187", + "step": 540, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03856397792696953, + "timestamp": "2025-10-01 03:21:34.710207", + "step": 541, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:34.750755", + "step": 541, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08373469114303589, + "timestamp": "2025-10-01 03:21:34.752914", + "step": 542, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.786562", + "step": 542, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04572203755378723, + "timestamp": "2025-10-01 03:21:34.789251", + "step": 543, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.827724", + "step": 543, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04541454836726189, + "timestamp": "2025-10-01 03:21:34.851671", + "step": 544, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.892323", + "step": 544, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0561620332300663, + "timestamp": "2025-10-01 03:21:34.895371", + "step": 545, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.936188", + "step": 545, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029050296172499657, + "timestamp": "2025-10-01 03:21:34.938895", + "step": 546, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:34.980512", + "step": 546, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026993142440915108, + "timestamp": "2025-10-01 03:21:34.982700", + "step": 547, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.025331", + "step": 547, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018753143027424812, + "timestamp": "2025-10-01 03:21:35.048889", + "step": 548, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:35.080878", + "step": 548, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01741393469274044, + "timestamp": "2025-10-01 03:21:35.082966", + "step": 549, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.116555", + "step": 549, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025020167231559753, + "timestamp": "2025-10-01 03:21:35.118621", + "step": 550, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.150797", + "step": 550, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012994147837162018, + "timestamp": "2025-10-01 03:21:35.156368", + "step": 551, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.191875", + "step": 551, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02989545650780201, + "timestamp": "2025-10-01 03:21:35.215609", + "step": 552, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.258073", + "step": 552, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024913432076573372, + "timestamp": "2025-10-01 03:21:35.260479", + "step": 553, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.294099", + "step": 553, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05387217923998833, + "timestamp": "2025-10-01 03:21:35.296382", + "step": 554, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.328246", + "step": 554, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028850069269537926, + "timestamp": "2025-10-01 03:21:35.330267", + "step": 555, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.361151", + "step": 555, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020614445209503174, + "timestamp": "2025-10-01 03:21:35.384788", + "step": 556, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.417427", + "step": 556, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08055669814348221, + "timestamp": "2025-10-01 03:21:35.419763", + "step": 557, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.453970", + "step": 557, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025527318939566612, + "timestamp": "2025-10-01 03:21:35.456871", + "step": 558, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:35.487734", + "step": 558, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01047882717102766, + "timestamp": "2025-10-01 03:21:35.489761", + "step": 559, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:35.520225", + "step": 559, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04795101657509804, + "timestamp": "2025-10-01 03:21:35.543925", + "step": 560, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.574516", + "step": 560, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027170659974217415, + "timestamp": "2025-10-01 03:21:35.576514", + "step": 561, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.607126", + "step": 561, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.042369544506073, + "timestamp": "2025-10-01 03:21:35.609393", + "step": 562, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:35.639942", + "step": 562, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06234573572874069, + "timestamp": "2025-10-01 03:21:35.643878", + "step": 563, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.681356", + "step": 563, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036130912601947784, + "timestamp": "2025-10-01 03:21:35.705149", + "step": 564, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:35.740184", + "step": 564, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038526467978954315, + "timestamp": "2025-10-01 03:21:35.742912", + "step": 565, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:35.773534", + "step": 565, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05825746804475784, + "timestamp": "2025-10-01 03:21:35.775752", + "step": 566, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.807997", + "step": 566, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023759828880429268, + "timestamp": "2025-10-01 03:21:35.810892", + "step": 567, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.847663", + "step": 567, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02941654995083809, + "timestamp": "2025-10-01 03:21:35.870847", + "step": 568, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:35.905916", + "step": 568, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01553614716976881, + "timestamp": "2025-10-01 03:21:35.908681", + "step": 569, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.945910", + "step": 569, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03455112501978874, + "timestamp": "2025-10-01 03:21:35.948632", + "step": 570, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:35.987273", + "step": 570, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03105643391609192, + "timestamp": "2025-10-01 03:21:35.989278", + "step": 571, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.030762", + "step": 571, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035467926412820816, + "timestamp": "2025-10-01 03:21:36.054263", + "step": 572, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.094769", + "step": 572, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05566677451133728, + "timestamp": "2025-10-01 03:21:36.097016", + "step": 573, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.138621", + "step": 573, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04212988540530205, + "timestamp": "2025-10-01 03:21:36.140910", + "step": 574, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.187874", + "step": 574, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013034653849899769, + "timestamp": "2025-10-01 03:21:36.189788", + "step": 575, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.237046", + "step": 575, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04342874139547348, + "timestamp": "2025-10-01 03:21:36.260591", + "step": 576, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.291520", + "step": 576, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03572627156972885, + "timestamp": "2025-10-01 03:21:36.293726", + "step": 577, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.327395", + "step": 577, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02475779689848423, + "timestamp": "2025-10-01 03:21:36.329628", + "step": 578, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.361503", + "step": 578, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009233533404767513, + "timestamp": "2025-10-01 03:21:36.363513", + "step": 579, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.397726", + "step": 579, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03786349669098854, + "timestamp": "2025-10-01 03:21:36.421378", + "step": 580, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:36.456785", + "step": 580, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025401417165994644, + "timestamp": "2025-10-01 03:21:36.458938", + "step": 581, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.494285", + "step": 581, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049032505601644516, + "timestamp": "2025-10-01 03:21:36.496545", + "step": 582, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.534791", + "step": 582, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039243705570697784, + "timestamp": "2025-10-01 03:21:36.536834", + "step": 583, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.569862", + "step": 583, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018580319359898567, + "timestamp": "2025-10-01 03:21:36.593569", + "step": 584, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:36.634232", + "step": 584, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028960874304175377, + "timestamp": "2025-10-01 03:21:36.636227", + "step": 585, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.678175", + "step": 585, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0104409484192729, + "timestamp": "2025-10-01 03:21:36.680260", + "step": 586, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.721767", + "step": 586, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04629410430788994, + "timestamp": "2025-10-01 03:21:36.723573", + "step": 587, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.767692", + "step": 587, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019163841381669044, + "timestamp": "2025-10-01 03:21:36.791065", + "step": 588, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:21:36.824758", + "step": 588, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03653392568230629, + "timestamp": "2025-10-01 03:21:36.826764", + "step": 589, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.867625", + "step": 589, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03066626563668251, + "timestamp": "2025-10-01 03:21:36.869639", + "step": 590, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.903330", + "step": 590, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02062402106821537, + "timestamp": "2025-10-01 03:21:36.905564", + "step": 591, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:36.941042", + "step": 591, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03568292409181595, + "timestamp": "2025-10-01 03:21:36.964749", + "step": 592, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:37.006092", + "step": 592, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028387462720274925, + "timestamp": "2025-10-01 03:21:37.008132", + "step": 593, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:37.040589", + "step": 593, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021938955411314964, + "timestamp": "2025-10-01 03:21:37.042601", + "step": 594, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:37.075630", + "step": 594, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023849431425333023, + "timestamp": "2025-10-01 03:21:37.077437", + "step": 595, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:37.119689", + "step": 595, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011990800499916077, + "timestamp": "2025-10-01 03:21:37.142986", + "step": 596, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:37.184516", + "step": 596, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049593787640333176, + "timestamp": "2025-10-01 03:21:37.186633", + "step": 597, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:37.218719", + "step": 597, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014813841320574284, + "timestamp": "2025-10-01 03:21:37.222179", + "step": 598, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:37.256747", + "step": 598, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016869474202394485, + "timestamp": "2025-10-01 03:21:37.258881", + "step": 599, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:37.295066", + "step": 599, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04964395985007286, + "timestamp": "2025-10-01 03:21:37.318804", + "step": 600, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:37.354041", + "step": 600, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04300464317202568, + "timestamp": "2025-10-01 03:21:37.358021", + "step": 601, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:37.393479", + "step": 601, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.050961945205926895, + "timestamp": "2025-10-01 03:21:37.395672", + "step": 602, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:37.428764", + "step": 602, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008434685878455639, + "timestamp": "2025-10-01 03:21:37.430348", + "step": 603, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:37.462420", + "step": 603, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03512928634881973, + "timestamp": "2025-10-01 03:21:37.486075", + "step": 604, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:37.518859", + "step": 604, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03029237501323223, + "timestamp": "2025-10-01 03:21:37.534035", + "step": 605, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:37.568049", + "step": 605, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034113217145204544, + "timestamp": "2025-10-01 03:21:37.570080", + "step": 606, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:37.604661", + "step": 606, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0076188803650438786, + "timestamp": "2025-10-01 03:21:37.606858", + "step": 607, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:37.638691", + "step": 607, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04519783705472946, + "timestamp": "2025-10-01 03:21:37.662839", + "step": 608, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:21:40.622256", + "step": 608, + "epoch": 1 + }, + { + "type": "pplx", + "content": 2541369.71917743, + "timestamp": "2025-10-01 03:21:40.624687", + "step": 608, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:40.665612", + "step": 608, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07007786631584167, + "timestamp": "2025-10-01 03:21:40.668801", + "step": 609, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:40.712078", + "step": 609, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04201190546154976, + "timestamp": "2025-10-01 03:21:40.714171", + "step": 610, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:40.768274", + "step": 610, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016661809757351875, + "timestamp": "2025-10-01 03:21:40.770632", + "step": 611, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:40.826074", + "step": 611, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01168554462492466, + "timestamp": "2025-10-01 03:21:40.850778", + "step": 612, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:40.884644", + "step": 612, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03883763402700424, + "timestamp": "2025-10-01 03:21:40.886629", + "step": 613, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:40.930987", + "step": 613, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05044114589691162, + "timestamp": "2025-10-01 03:21:40.933185", + "step": 614, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:40.965829", + "step": 614, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010038613341748714, + "timestamp": "2025-10-01 03:21:40.968653", + "step": 615, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.003622", + "step": 615, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007206707261502743, + "timestamp": "2025-10-01 03:21:41.033315", + "step": 616, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.083518", + "step": 616, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04819563031196594, + "timestamp": "2025-10-01 03:21:41.085530", + "step": 617, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.131338", + "step": 617, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028435256332159042, + "timestamp": "2025-10-01 03:21:41.133857", + "step": 618, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.166848", + "step": 618, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02425406686961651, + "timestamp": "2025-10-01 03:21:41.169769", + "step": 619, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.217833", + "step": 619, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021303633227944374, + "timestamp": "2025-10-01 03:21:41.241479", + "step": 620, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:41.278450", + "step": 620, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00573085667565465, + "timestamp": "2025-10-01 03:21:41.280755", + "step": 621, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:41.313404", + "step": 621, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027593214064836502, + "timestamp": "2025-10-01 03:21:41.315342", + "step": 622, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:41.347683", + "step": 622, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04506673663854599, + "timestamp": "2025-10-01 03:21:41.349715", + "step": 623, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.391511", + "step": 623, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0538424551486969, + "timestamp": "2025-10-01 03:21:41.415453", + "step": 624, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.457290", + "step": 624, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06334152072668076, + "timestamp": "2025-10-01 03:21:41.459480", + "step": 625, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.497762", + "step": 625, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040743742138147354, + "timestamp": "2025-10-01 03:21:41.500052", + "step": 626, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.533569", + "step": 626, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034635093063116074, + "timestamp": "2025-10-01 03:21:41.535770", + "step": 627, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.572104", + "step": 627, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02068197727203369, + "timestamp": "2025-10-01 03:21:41.595661", + "step": 628, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.627837", + "step": 628, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023436304181814194, + "timestamp": "2025-10-01 03:21:41.629861", + "step": 629, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.668729", + "step": 629, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0471055768430233, + "timestamp": "2025-10-01 03:21:41.671154", + "step": 630, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.706946", + "step": 630, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043292704969644547, + "timestamp": "2025-10-01 03:21:41.709086", + "step": 631, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.750956", + "step": 631, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04712552949786186, + "timestamp": "2025-10-01 03:21:41.774512", + "step": 632, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.807093", + "step": 632, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012571332976222038, + "timestamp": "2025-10-01 03:21:41.809198", + "step": 633, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.842817", + "step": 633, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.050346244126558304, + "timestamp": "2025-10-01 03:21:41.844455", + "step": 634, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.877462", + "step": 634, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03459993749856949, + "timestamp": "2025-10-01 03:21:41.879335", + "step": 635, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.913118", + "step": 635, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028948253020644188, + "timestamp": "2025-10-01 03:21:41.936832", + "step": 636, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:41.970181", + "step": 636, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007621294353157282, + "timestamp": "2025-10-01 03:21:41.972318", + "step": 637, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.004152", + "step": 637, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03282740339636803, + "timestamp": "2025-10-01 03:21:42.006173", + "step": 638, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:42.039890", + "step": 638, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022979287430644035, + "timestamp": "2025-10-01 03:21:42.041958", + "step": 639, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.077644", + "step": 639, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038550663739442825, + "timestamp": "2025-10-01 03:21:42.101281", + "step": 640, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.134763", + "step": 640, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028354298323392868, + "timestamp": "2025-10-01 03:21:42.136798", + "step": 641, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.170285", + "step": 641, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01887590065598488, + "timestamp": "2025-10-01 03:21:42.172225", + "step": 642, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.207511", + "step": 642, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03413723036646843, + "timestamp": "2025-10-01 03:21:42.212208", + "step": 643, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.249531", + "step": 643, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05773584917187691, + "timestamp": "2025-10-01 03:21:42.287926", + "step": 644, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.333878", + "step": 644, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06682001799345016, + "timestamp": "2025-10-01 03:21:42.349761", + "step": 645, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:42.393082", + "step": 645, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027903417125344276, + "timestamp": "2025-10-01 03:21:42.397862", + "step": 646, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.433415", + "step": 646, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026236537843942642, + "timestamp": "2025-10-01 03:21:42.437531", + "step": 647, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.478485", + "step": 647, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0347476564347744, + "timestamp": "2025-10-01 03:21:42.504203", + "step": 648, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:42.537200", + "step": 648, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037924326956272125, + "timestamp": "2025-10-01 03:21:42.541411", + "step": 649, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.575669", + "step": 649, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.051201097667217255, + "timestamp": "2025-10-01 03:21:42.579969", + "step": 650, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.629286", + "step": 650, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007328324485570192, + "timestamp": "2025-10-01 03:21:42.643902", + "step": 651, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:42.694184", + "step": 651, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017843512818217278, + "timestamp": "2025-10-01 03:21:42.729620", + "step": 652, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.772799", + "step": 652, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012259244918823242, + "timestamp": "2025-10-01 03:21:42.777011", + "step": 653, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.822980", + "step": 653, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.062486980110406876, + "timestamp": "2025-10-01 03:21:42.826925", + "step": 654, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.874289", + "step": 654, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04318806529045105, + "timestamp": "2025-10-01 03:21:42.889956", + "step": 655, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:42.926928", + "step": 655, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04440777748823166, + "timestamp": "2025-10-01 03:21:42.964471", + "step": 656, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.012951", + "step": 656, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05534902960062027, + "timestamp": "2025-10-01 03:21:43.023077", + "step": 657, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:43.064854", + "step": 657, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.054159101098775864, + "timestamp": "2025-10-01 03:21:43.074191", + "step": 658, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.126226", + "step": 658, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029482515528798103, + "timestamp": "2025-10-01 03:21:43.136245", + "step": 659, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.175640", + "step": 659, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02967374585568905, + "timestamp": "2025-10-01 03:21:43.207480", + "step": 660, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.247924", + "step": 660, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043990444391965866, + "timestamp": "2025-10-01 03:21:43.251038", + "step": 661, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.286605", + "step": 661, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024344902485609055, + "timestamp": "2025-10-01 03:21:43.291344", + "step": 662, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.339128", + "step": 662, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05591925233602524, + "timestamp": "2025-10-01 03:21:43.347630", + "step": 663, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.395580", + "step": 663, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009596110321581364, + "timestamp": "2025-10-01 03:21:43.426958", + "step": 664, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.469012", + "step": 664, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01488166581839323, + "timestamp": "2025-10-01 03:21:43.478550", + "step": 665, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:43.519574", + "step": 665, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014620467089116573, + "timestamp": "2025-10-01 03:21:43.526912", + "step": 666, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.570967", + "step": 666, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03856545314192772, + "timestamp": "2025-10-01 03:21:43.576087", + "step": 667, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:43.617946", + "step": 667, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02359985001385212, + "timestamp": "2025-10-01 03:21:43.646911", + "step": 668, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.686447", + "step": 668, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03363505005836487, + "timestamp": "2025-10-01 03:21:43.694646", + "step": 669, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.731115", + "step": 669, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025890672579407692, + "timestamp": "2025-10-01 03:21:43.751984", + "step": 670, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.792287", + "step": 670, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03349059447646141, + "timestamp": "2025-10-01 03:21:43.802522", + "step": 671, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.845989", + "step": 671, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007964621298015118, + "timestamp": "2025-10-01 03:21:43.877649", + "step": 672, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.917934", + "step": 672, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024899518117308617, + "timestamp": "2025-10-01 03:21:43.928146", + "step": 673, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:43.975117", + "step": 673, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049413569271564484, + "timestamp": "2025-10-01 03:21:43.987619", + "step": 674, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.034691", + "step": 674, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007244005799293518, + "timestamp": "2025-10-01 03:21:44.038637", + "step": 675, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:44.083599", + "step": 675, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04878580942749977, + "timestamp": "2025-10-01 03:21:44.109211", + "step": 676, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.152221", + "step": 676, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06714563816785812, + "timestamp": "2025-10-01 03:21:44.161246", + "step": 677, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.202933", + "step": 677, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06267175823450089, + "timestamp": "2025-10-01 03:21:44.213055", + "step": 678, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.252144", + "step": 678, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037257373332977295, + "timestamp": "2025-10-01 03:21:44.262765", + "step": 679, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.307314", + "step": 679, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02199205569922924, + "timestamp": "2025-10-01 03:21:44.336974", + "step": 680, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.381834", + "step": 680, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011248961091041565, + "timestamp": "2025-10-01 03:21:44.391638", + "step": 681, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.436893", + "step": 681, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022939244285225868, + "timestamp": "2025-10-01 03:21:44.440646", + "step": 682, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.488293", + "step": 682, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022588664665818214, + "timestamp": "2025-10-01 03:21:44.500389", + "step": 683, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.543150", + "step": 683, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016338078305125237, + "timestamp": "2025-10-01 03:21:44.574796", + "step": 684, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.615472", + "step": 684, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011315934360027313, + "timestamp": "2025-10-01 03:21:44.623151", + "step": 685, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.671149", + "step": 685, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03541107103228569, + "timestamp": "2025-10-01 03:21:44.673503", + "step": 686, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.719215", + "step": 686, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03578633815050125, + "timestamp": "2025-10-01 03:21:44.722674", + "step": 687, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.760654", + "step": 687, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024129759520292282, + "timestamp": "2025-10-01 03:21:44.786771", + "step": 688, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.830461", + "step": 688, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03804786130785942, + "timestamp": "2025-10-01 03:21:44.841874", + "step": 689, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.886585", + "step": 689, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020790403708815575, + "timestamp": "2025-10-01 03:21:44.896871", + "step": 690, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.934497", + "step": 690, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06722570210695267, + "timestamp": "2025-10-01 03:21:44.947028", + "step": 691, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:44.985749", + "step": 691, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.09219562262296677, + "timestamp": "2025-10-01 03:21:45.018064", + "step": 692, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:45.065692", + "step": 692, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03260277956724167, + "timestamp": "2025-10-01 03:21:45.074094", + "step": 693, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:45.112942", + "step": 693, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0350026860833168, + "timestamp": "2025-10-01 03:21:45.124665", + "step": 694, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:45.177737", + "step": 694, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022045865654945374, + "timestamp": "2025-10-01 03:21:45.185554", + "step": 695, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:45.231576", + "step": 695, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024015365168452263, + "timestamp": "2025-10-01 03:21:45.265825", + "step": 696, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:45.313657", + "step": 696, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039311740547418594, + "timestamp": "2025-10-01 03:21:45.318343", + "step": 697, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:45.375113", + "step": 697, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0236066821962595, + "timestamp": "2025-10-01 03:21:45.385072", + "step": 698, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:45.442619", + "step": 698, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028794782236218452, + "timestamp": "2025-10-01 03:21:45.450857", + "step": 699, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:45.494002", + "step": 699, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021925073117017746, + "timestamp": "2025-10-01 03:21:45.525729", + "step": 700, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:45.565903", + "step": 700, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01671312376856804, + "timestamp": "2025-10-01 03:21:45.576299", + "step": 701, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:45.613653", + "step": 701, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.051827363669872284, + "timestamp": "2025-10-01 03:21:45.627095", + "step": 702, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:45.670593", + "step": 702, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03932129964232445, + "timestamp": "2025-10-01 03:21:45.680941", + "step": 703, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:45.726859", + "step": 703, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034455861896276474, + "timestamp": "2025-10-01 03:21:45.754066", + "step": 704, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:45.805691", + "step": 704, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00795345101505518, + "timestamp": "2025-10-01 03:21:45.815574", + "step": 705, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:45.862573", + "step": 705, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033563874661922455, + "timestamp": "2025-10-01 03:21:45.874153", + "step": 706, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:45.927284", + "step": 706, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032824717462062836, + "timestamp": "2025-10-01 03:21:45.930682", + "step": 707, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:45.973537", + "step": 707, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018788831308484077, + "timestamp": "2025-10-01 03:21:46.005317", + "step": 708, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:46.058044", + "step": 708, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034590594470500946, + "timestamp": "2025-10-01 03:21:46.066198", + "step": 709, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:46.116715", + "step": 709, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024111924692988396, + "timestamp": "2025-10-01 03:21:46.127778", + "step": 710, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:46.171899", + "step": 710, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03576008230447769, + "timestamp": "2025-10-01 03:21:46.182766", + "step": 711, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:46.234784", + "step": 711, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0338473841547966, + "timestamp": "2025-10-01 03:21:46.265916", + "step": 712, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:46.305140", + "step": 712, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06841777265071869, + "timestamp": "2025-10-01 03:21:46.317025", + "step": 713, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:46.356007", + "step": 713, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02218368463218212, + "timestamp": "2025-10-01 03:21:46.367779", + "step": 714, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:46.413566", + "step": 714, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024123309180140495, + "timestamp": "2025-10-01 03:21:46.425087", + "step": 715, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:46.470505", + "step": 715, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013394827954471111, + "timestamp": "2025-10-01 03:21:46.502494", + "step": 716, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:46.546356", + "step": 716, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037139732390642166, + "timestamp": "2025-10-01 03:21:46.554719", + "step": 717, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:46.613201", + "step": 717, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03574772924184799, + "timestamp": "2025-10-01 03:21:46.624835", + "step": 718, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:46.668195", + "step": 718, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014042341150343418, + "timestamp": "2025-10-01 03:21:46.677957", + "step": 719, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:46.722247", + "step": 719, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03130093216896057, + "timestamp": "2025-10-01 03:21:46.755497", + "step": 720, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:46.800868", + "step": 720, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05544882267713547, + "timestamp": "2025-10-01 03:21:46.810367", + "step": 721, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:46.858571", + "step": 721, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04404587671160698, + "timestamp": "2025-10-01 03:21:46.875904", + "step": 722, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:46.933762", + "step": 722, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030728237703442574, + "timestamp": "2025-10-01 03:21:46.944456", + "step": 723, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.000925", + "step": 723, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007378323469310999, + "timestamp": "2025-10-01 03:21:47.034409", + "step": 724, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.082256", + "step": 724, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035090792924165726, + "timestamp": "2025-10-01 03:21:47.094111", + "step": 725, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.139068", + "step": 725, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03585323691368103, + "timestamp": "2025-10-01 03:21:47.153011", + "step": 726, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.203987", + "step": 726, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04319249466061592, + "timestamp": "2025-10-01 03:21:47.214607", + "step": 727, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.282731", + "step": 727, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05388307198882103, + "timestamp": "2025-10-01 03:21:47.315404", + "step": 728, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.362631", + "step": 728, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.047599442303180695, + "timestamp": "2025-10-01 03:21:47.373310", + "step": 729, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.418816", + "step": 729, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028371652588248253, + "timestamp": "2025-10-01 03:21:47.431704", + "step": 730, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.473142", + "step": 730, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02604098804295063, + "timestamp": "2025-10-01 03:21:47.485584", + "step": 731, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.529620", + "step": 731, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03402930125594139, + "timestamp": "2025-10-01 03:21:47.562947", + "step": 732, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.610146", + "step": 732, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03605136275291443, + "timestamp": "2025-10-01 03:21:47.615152", + "step": 733, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.658995", + "step": 733, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011475891806185246, + "timestamp": "2025-10-01 03:21:47.662325", + "step": 734, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.704697", + "step": 734, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027253905311226845, + "timestamp": "2025-10-01 03:21:47.708256", + "step": 735, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.749875", + "step": 735, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04707350581884384, + "timestamp": "2025-10-01 03:21:47.780438", + "step": 736, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.824273", + "step": 736, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021411219611763954, + "timestamp": "2025-10-01 03:21:47.827189", + "step": 737, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:47.871506", + "step": 737, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016985327005386353, + "timestamp": "2025-10-01 03:21:47.881554", + "step": 738, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.926543", + "step": 738, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02240918204188347, + "timestamp": "2025-10-01 03:21:47.929979", + "step": 739, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:47.977282", + "step": 739, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014935826882719994, + "timestamp": "2025-10-01 03:21:48.002756", + "step": 740, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:48.049043", + "step": 740, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04613146185874939, + "timestamp": "2025-10-01 03:21:48.058837", + "step": 741, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.102880", + "step": 741, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05383765324950218, + "timestamp": "2025-10-01 03:21:48.107337", + "step": 742, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.152112", + "step": 742, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038332524709403515, + "timestamp": "2025-10-01 03:21:48.164747", + "step": 743, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.201105", + "step": 743, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031183723360300064, + "timestamp": "2025-10-01 03:21:48.226169", + "step": 744, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.275176", + "step": 744, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07909000664949417, + "timestamp": "2025-10-01 03:21:48.285169", + "step": 745, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.336905", + "step": 745, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016779359430074692, + "timestamp": "2025-10-01 03:21:48.353608", + "step": 746, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.400640", + "step": 746, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02395823784172535, + "timestamp": "2025-10-01 03:21:48.407078", + "step": 747, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.451891", + "step": 747, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0414930023252964, + "timestamp": "2025-10-01 03:21:48.485413", + "step": 748, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.535126", + "step": 748, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030017124488949776, + "timestamp": "2025-10-01 03:21:48.548757", + "step": 749, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:48.605369", + "step": 749, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04018337279558182, + "timestamp": "2025-10-01 03:21:48.608359", + "step": 750, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.662003", + "step": 750, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07084427028894424, + "timestamp": "2025-10-01 03:21:48.673473", + "step": 751, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.713686", + "step": 751, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015048452652990818, + "timestamp": "2025-10-01 03:21:48.745065", + "step": 752, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.791799", + "step": 752, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016291674226522446, + "timestamp": "2025-10-01 03:21:48.804088", + "step": 753, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.848350", + "step": 753, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03424728289246559, + "timestamp": "2025-10-01 03:21:48.861581", + "step": 754, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.908460", + "step": 754, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04738408699631691, + "timestamp": "2025-10-01 03:21:48.921220", + "step": 755, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:48.974624", + "step": 755, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0315813384950161, + "timestamp": "2025-10-01 03:21:49.008485", + "step": 756, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:49.055141", + "step": 756, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026354128494858742, + "timestamp": "2025-10-01 03:21:49.068106", + "step": 757, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:49.119322", + "step": 757, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0709729865193367, + "timestamp": "2025-10-01 03:21:49.132111", + "step": 758, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:49.181918", + "step": 758, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009219320490956306, + "timestamp": "2025-10-01 03:21:49.196823", + "step": 759, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:49.241189", + "step": 759, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06331274658441544, + "timestamp": "2025-10-01 03:21:49.274446", + "step": 760, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:21:52.355525", + "step": 760, + "epoch": 1 + }, + { + "type": "pplx", + "content": 2774937.64063532, + "timestamp": "2025-10-01 03:21:52.359428", + "step": 760, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:52.393393", + "step": 760, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019587935879826546, + "timestamp": "2025-10-01 03:21:52.397870", + "step": 761, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:52.438510", + "step": 761, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025472445413470268, + "timestamp": "2025-10-01 03:21:52.441428", + "step": 762, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:52.482762", + "step": 762, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05226452276110649, + "timestamp": "2025-10-01 03:21:52.487475", + "step": 763, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:52.526289", + "step": 763, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08228765428066254, + "timestamp": "2025-10-01 03:21:52.552398", + "step": 764, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:52.595203", + "step": 764, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03833889588713646, + "timestamp": "2025-10-01 03:21:52.600685", + "step": 765, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:52.644225", + "step": 765, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04552620276808739, + "timestamp": "2025-10-01 03:21:52.648144", + "step": 766, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:52.682616", + "step": 766, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03656487911939621, + "timestamp": "2025-10-01 03:21:52.686073", + "step": 767, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:52.744223", + "step": 767, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024020982906222343, + "timestamp": "2025-10-01 03:21:52.769737", + "step": 768, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:52.808150", + "step": 768, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014864616096019745, + "timestamp": "2025-10-01 03:21:52.811512", + "step": 769, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:52.848335", + "step": 769, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03756174445152283, + "timestamp": "2025-10-01 03:21:52.854028", + "step": 770, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:52.892765", + "step": 770, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00766287324950099, + "timestamp": "2025-10-01 03:21:52.896899", + "step": 771, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:52.933600", + "step": 771, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046369727700948715, + "timestamp": "2025-10-01 03:21:52.959726", + "step": 772, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.013296", + "step": 772, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025101762264966965, + "timestamp": "2025-10-01 03:21:53.029749", + "step": 773, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.079141", + "step": 773, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036066241562366486, + "timestamp": "2025-10-01 03:21:53.094813", + "step": 774, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.147668", + "step": 774, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01662353053689003, + "timestamp": "2025-10-01 03:21:53.161392", + "step": 775, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.217396", + "step": 775, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009258442558348179, + "timestamp": "2025-10-01 03:21:53.254045", + "step": 776, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.301167", + "step": 776, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015035935677587986, + "timestamp": "2025-10-01 03:21:53.315966", + "step": 777, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:53.362841", + "step": 777, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024054789915680885, + "timestamp": "2025-10-01 03:21:53.377966", + "step": 778, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.426274", + "step": 778, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0844641700387001, + "timestamp": "2025-10-01 03:21:53.432753", + "step": 779, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.469094", + "step": 779, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015576533041894436, + "timestamp": "2025-10-01 03:21:53.508869", + "step": 780, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.549947", + "step": 780, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04124925285577774, + "timestamp": "2025-10-01 03:21:53.555309", + "step": 781, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:53.590698", + "step": 781, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04404458403587341, + "timestamp": "2025-10-01 03:21:53.604482", + "step": 782, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.651148", + "step": 782, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033586010336875916, + "timestamp": "2025-10-01 03:21:53.663837", + "step": 783, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.710780", + "step": 783, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012660312466323376, + "timestamp": "2025-10-01 03:21:53.746601", + "step": 784, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.795156", + "step": 784, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014965550042688847, + "timestamp": "2025-10-01 03:21:53.810411", + "step": 785, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.856549", + "step": 785, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02133713662624359, + "timestamp": "2025-10-01 03:21:53.869596", + "step": 786, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.927237", + "step": 786, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03922215849161148, + "timestamp": "2025-10-01 03:21:53.940749", + "step": 787, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:53.986023", + "step": 787, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03219889849424362, + "timestamp": "2025-10-01 03:21:54.022254", + "step": 788, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:54.068637", + "step": 788, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02264563925564289, + "timestamp": "2025-10-01 03:21:54.083765", + "step": 789, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:54.139517", + "step": 789, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016186529770493507, + "timestamp": "2025-10-01 03:21:54.155100", + "step": 790, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:54.201112", + "step": 790, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04875059798359871, + "timestamp": "2025-10-01 03:21:54.213217", + "step": 791, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:54.270520", + "step": 791, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03940239176154137, + "timestamp": "2025-10-01 03:21:54.303495", + "step": 792, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:54.357797", + "step": 792, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04800339415669441, + "timestamp": "2025-10-01 03:21:54.370862", + "step": 793, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:54.419314", + "step": 793, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04594007134437561, + "timestamp": "2025-10-01 03:21:54.432262", + "step": 794, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:54.477288", + "step": 794, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008175364695489407, + "timestamp": "2025-10-01 03:21:54.490546", + "step": 795, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:54.546709", + "step": 795, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0122317960485816, + "timestamp": "2025-10-01 03:21:54.581892", + "step": 796, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:54.628179", + "step": 796, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02557474933564663, + "timestamp": "2025-10-01 03:21:54.643458", + "step": 797, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:54.687255", + "step": 797, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03395412489771843, + "timestamp": "2025-10-01 03:21:54.698856", + "step": 798, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:54.743698", + "step": 798, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01125849224627018, + "timestamp": "2025-10-01 03:21:54.758041", + "step": 799, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:54.814727", + "step": 799, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015385523438453674, + "timestamp": "2025-10-01 03:21:54.849781", + "step": 800, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:54.893889", + "step": 800, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005727329291403294, + "timestamp": "2025-10-01 03:21:54.907413", + "step": 801, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:54.950251", + "step": 801, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008332043886184692, + "timestamp": "2025-10-01 03:21:54.963486", + "step": 802, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:55.015272", + "step": 802, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026724591851234436, + "timestamp": "2025-10-01 03:21:55.019465", + "step": 803, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:55.065508", + "step": 803, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.052558425813913345, + "timestamp": "2025-10-01 03:21:55.100599", + "step": 804, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:55.146269", + "step": 804, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014798680320382118, + "timestamp": "2025-10-01 03:21:55.161000", + "step": 805, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:55.210011", + "step": 805, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026463715359568596, + "timestamp": "2025-10-01 03:21:55.223337", + "step": 806, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:55.277433", + "step": 806, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023841235786676407, + "timestamp": "2025-10-01 03:21:55.292882", + "step": 807, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:55.341352", + "step": 807, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05624144896864891, + "timestamp": "2025-10-01 03:21:55.376148", + "step": 808, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:55.414444", + "step": 808, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04297583922743797, + "timestamp": "2025-10-01 03:21:55.427412", + "step": 809, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:55.472395", + "step": 809, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010814917273819447, + "timestamp": "2025-10-01 03:21:55.486309", + "step": 810, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:55.529974", + "step": 810, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03532738983631134, + "timestamp": "2025-10-01 03:21:55.541692", + "step": 811, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:55.578110", + "step": 811, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02476157806813717, + "timestamp": "2025-10-01 03:21:55.611715", + "step": 812, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:55.665950", + "step": 812, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023431552574038506, + "timestamp": "2025-10-01 03:21:55.679629", + "step": 813, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:55.716337", + "step": 813, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027339961379766464, + "timestamp": "2025-10-01 03:21:55.731032", + "step": 814, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:55.775922", + "step": 814, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0052986303344368935, + "timestamp": "2025-10-01 03:21:55.790688", + "step": 815, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:55.843242", + "step": 815, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0538351908326149, + "timestamp": "2025-10-01 03:21:55.879495", + "step": 816, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:55.925917", + "step": 816, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04940848425030708, + "timestamp": "2025-10-01 03:21:55.937967", + "step": 817, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:55.983664", + "step": 817, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01785695180296898, + "timestamp": "2025-10-01 03:21:55.996680", + "step": 818, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:56.044152", + "step": 818, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012845322489738464, + "timestamp": "2025-10-01 03:21:56.059362", + "step": 819, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:56.106771", + "step": 819, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012837870046496391, + "timestamp": "2025-10-01 03:21:56.142847", + "step": 820, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:56.198070", + "step": 820, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03694736957550049, + "timestamp": "2025-10-01 03:21:56.210384", + "step": 821, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:56.256897", + "step": 821, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03933262079954147, + "timestamp": "2025-10-01 03:21:56.269856", + "step": 822, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:56.315679", + "step": 822, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0030122834723442793, + "timestamp": "2025-10-01 03:21:56.329117", + "step": 823, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:56.373954", + "step": 823, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027230065315961838, + "timestamp": "2025-10-01 03:21:56.400180", + "step": 824, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:56.447445", + "step": 824, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03307868540287018, + "timestamp": "2025-10-01 03:21:56.459443", + "step": 825, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:56.503650", + "step": 825, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04079753905534744, + "timestamp": "2025-10-01 03:21:56.517215", + "step": 826, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:56.572701", + "step": 826, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02190123125910759, + "timestamp": "2025-10-01 03:21:56.587909", + "step": 827, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:56.632638", + "step": 827, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00800885446369648, + "timestamp": "2025-10-01 03:21:56.667518", + "step": 828, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:56.713240", + "step": 828, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.048125382512807846, + "timestamp": "2025-10-01 03:21:56.728112", + "step": 829, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:56.787028", + "step": 829, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06986787170171738, + "timestamp": "2025-10-01 03:21:56.800174", + "step": 830, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:56.845771", + "step": 830, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049263425171375275, + "timestamp": "2025-10-01 03:21:56.861095", + "step": 831, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:56.904843", + "step": 831, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03234516456723213, + "timestamp": "2025-10-01 03:21:56.939384", + "step": 832, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:56.993322", + "step": 832, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.061128947883844376, + "timestamp": "2025-10-01 03:21:56.998761", + "step": 833, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.034444", + "step": 833, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033647868782281876, + "timestamp": "2025-10-01 03:21:57.049270", + "step": 834, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.096776", + "step": 834, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05079019069671631, + "timestamp": "2025-10-01 03:21:57.108748", + "step": 835, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.153387", + "step": 835, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07487256079912186, + "timestamp": "2025-10-01 03:21:57.179292", + "step": 836, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.224005", + "step": 836, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029349613934755325, + "timestamp": "2025-10-01 03:21:57.236766", + "step": 837, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.283040", + "step": 837, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013662203215062618, + "timestamp": "2025-10-01 03:21:57.296485", + "step": 838, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:57.340130", + "step": 838, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07457327097654343, + "timestamp": "2025-10-01 03:21:57.353505", + "step": 839, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.406643", + "step": 839, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007598718162626028, + "timestamp": "2025-10-01 03:21:57.442447", + "step": 840, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.486321", + "step": 840, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02800445258617401, + "timestamp": "2025-10-01 03:21:57.498133", + "step": 841, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.539325", + "step": 841, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0026691157836467028, + "timestamp": "2025-10-01 03:21:57.552997", + "step": 842, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.605464", + "step": 842, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005438469350337982, + "timestamp": "2025-10-01 03:21:57.617664", + "step": 843, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.667537", + "step": 843, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04013844579458237, + "timestamp": "2025-10-01 03:21:57.694594", + "step": 844, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:21:57.738917", + "step": 844, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013419387862086296, + "timestamp": "2025-10-01 03:21:57.751217", + "step": 845, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.795366", + "step": 845, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006159189622849226, + "timestamp": "2025-10-01 03:21:57.806353", + "step": 846, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.852000", + "step": 846, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024982837960124016, + "timestamp": "2025-10-01 03:21:57.866077", + "step": 847, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.919015", + "step": 847, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010965168476104736, + "timestamp": "2025-10-01 03:21:57.951944", + "step": 848, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:57.991358", + "step": 848, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024024737998843193, + "timestamp": "2025-10-01 03:21:58.005167", + "step": 849, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:58.059993", + "step": 849, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04967940226197243, + "timestamp": "2025-10-01 03:21:58.072203", + "step": 850, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:58.127395", + "step": 850, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002219116548076272, + "timestamp": "2025-10-01 03:21:58.138306", + "step": 851, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:58.192342", + "step": 851, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02837294340133667, + "timestamp": "2025-10-01 03:21:58.226508", + "step": 852, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:58.277614", + "step": 852, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04345163702964783, + "timestamp": "2025-10-01 03:21:58.290347", + "step": 853, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:58.332158", + "step": 853, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018042275682091713, + "timestamp": "2025-10-01 03:21:58.337251", + "step": 854, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:58.372979", + "step": 854, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03000424988567829, + "timestamp": "2025-10-01 03:21:58.387722", + "step": 855, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:58.429877", + "step": 855, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012643185444176197, + "timestamp": "2025-10-01 03:21:58.462645", + "step": 856, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:58.507019", + "step": 856, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016716409474611282, + "timestamp": "2025-10-01 03:21:58.519406", + "step": 857, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:58.559759", + "step": 857, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04000121355056763, + "timestamp": "2025-10-01 03:21:58.572066", + "step": 858, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:58.613446", + "step": 858, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039873480796813965, + "timestamp": "2025-10-01 03:21:58.624067", + "step": 859, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:58.671218", + "step": 859, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0777149349451065, + "timestamp": "2025-10-01 03:21:58.704242", + "step": 860, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:58.752131", + "step": 860, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043440546840429306, + "timestamp": "2025-10-01 03:21:58.763962", + "step": 861, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:58.798095", + "step": 861, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01554486807435751, + "timestamp": "2025-10-01 03:21:58.810215", + "step": 862, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:58.860646", + "step": 862, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07988528907299042, + "timestamp": "2025-10-01 03:21:58.873944", + "step": 863, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:58.912274", + "step": 863, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024087190628051758, + "timestamp": "2025-10-01 03:21:58.944228", + "step": 864, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:58.986242", + "step": 864, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03959989547729492, + "timestamp": "2025-10-01 03:21:58.990012", + "step": 865, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.039445", + "step": 865, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025090834125876427, + "timestamp": "2025-10-01 03:21:59.048851", + "step": 866, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.095155", + "step": 866, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03390207141637802, + "timestamp": "2025-10-01 03:21:59.105561", + "step": 867, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.160662", + "step": 867, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.045410849153995514, + "timestamp": "2025-10-01 03:21:59.190992", + "step": 868, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.240882", + "step": 868, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05709913745522499, + "timestamp": "2025-10-01 03:21:59.256880", + "step": 869, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.307528", + "step": 869, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06347237527370453, + "timestamp": "2025-10-01 03:21:59.311749", + "step": 870, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.354976", + "step": 870, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015801481902599335, + "timestamp": "2025-10-01 03:21:59.366189", + "step": 871, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.406678", + "step": 871, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013010934926569462, + "timestamp": "2025-10-01 03:21:59.438511", + "step": 872, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:21:59.485066", + "step": 872, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.062039874494075775, + "timestamp": "2025-10-01 03:21:59.490246", + "step": 873, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.533453", + "step": 873, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017869090661406517, + "timestamp": "2025-10-01 03:21:59.543584", + "step": 874, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.589820", + "step": 874, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02347574010491371, + "timestamp": "2025-10-01 03:21:59.602897", + "step": 875, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.655387", + "step": 875, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04312342032790184, + "timestamp": "2025-10-01 03:21:59.688014", + "step": 876, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:21:59.734960", + "step": 876, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03925793245434761, + "timestamp": "2025-10-01 03:21:59.747765", + "step": 877, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.791497", + "step": 877, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02745692990720272, + "timestamp": "2025-10-01 03:21:59.801168", + "step": 878, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.848654", + "step": 878, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020897606387734413, + "timestamp": "2025-10-01 03:21:59.858305", + "step": 879, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.899586", + "step": 879, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04229297861456871, + "timestamp": "2025-10-01 03:21:59.932150", + "step": 880, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:21:59.985901", + "step": 880, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030389610677957535, + "timestamp": "2025-10-01 03:21:59.995395", + "step": 881, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.049328", + "step": 881, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039970044046640396, + "timestamp": "2025-10-01 03:22:00.053116", + "step": 882, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.103274", + "step": 882, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035105857998132706, + "timestamp": "2025-10-01 03:22:00.116852", + "step": 883, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.156845", + "step": 883, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023668905720114708, + "timestamp": "2025-10-01 03:22:00.191862", + "step": 884, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.243937", + "step": 884, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020946675911545753, + "timestamp": "2025-10-01 03:22:00.256761", + "step": 885, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.298596", + "step": 885, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02345171943306923, + "timestamp": "2025-10-01 03:22:00.308527", + "step": 886, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.348608", + "step": 886, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03013618476688862, + "timestamp": "2025-10-01 03:22:00.359347", + "step": 887, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.405248", + "step": 887, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04995955154299736, + "timestamp": "2025-10-01 03:22:00.439069", + "step": 888, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.474791", + "step": 888, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04958776757121086, + "timestamp": "2025-10-01 03:22:00.487748", + "step": 889, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.534547", + "step": 889, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007550073321908712, + "timestamp": "2025-10-01 03:22:00.537498", + "step": 890, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-10-01 03:22:00.597926", + "step": 890, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06636320054531097, + "timestamp": "2025-10-01 03:22:00.609254", + "step": 891, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.652328", + "step": 891, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010231428779661655, + "timestamp": "2025-10-01 03:22:00.685818", + "step": 892, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.721287", + "step": 892, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01714364066720009, + "timestamp": "2025-10-01 03:22:00.732634", + "step": 893, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.780429", + "step": 893, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040170978754758835, + "timestamp": "2025-10-01 03:22:00.792911", + "step": 894, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.839605", + "step": 894, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05247178301215172, + "timestamp": "2025-10-01 03:22:00.850938", + "step": 895, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.891990", + "step": 895, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04197350889444351, + "timestamp": "2025-10-01 03:22:00.926495", + "step": 896, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:00.966762", + "step": 896, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0385713167488575, + "timestamp": "2025-10-01 03:22:00.977976", + "step": 897, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:01.017768", + "step": 897, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05433339625597, + "timestamp": "2025-10-01 03:22:01.029265", + "step": 898, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:01.070091", + "step": 898, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05092743784189224, + "timestamp": "2025-10-01 03:22:01.082170", + "step": 899, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:01.128447", + "step": 899, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03390706330537796, + "timestamp": "2025-10-01 03:22:01.161727", + "step": 900, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:01.206472", + "step": 900, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013444527983665466, + "timestamp": "2025-10-01 03:22:01.218853", + "step": 901, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:01.253553", + "step": 901, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03218306228518486, + "timestamp": "2025-10-01 03:22:01.266681", + "step": 902, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:01.307161", + "step": 902, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02500213123857975, + "timestamp": "2025-10-01 03:22:01.314106", + "step": 903, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:01.365262", + "step": 903, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02131069079041481, + "timestamp": "2025-10-01 03:22:01.399637", + "step": 904, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:01.442032", + "step": 904, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018994351848959923, + "timestamp": "2025-10-01 03:22:01.454630", + "step": 905, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:01.503593", + "step": 905, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035498786717653275, + "timestamp": "2025-10-01 03:22:01.515323", + "step": 906, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:01.554843", + "step": 906, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041818685829639435, + "timestamp": "2025-10-01 03:22:01.559915", + "step": 907, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:01.600551", + "step": 907, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008269102312624454, + "timestamp": "2025-10-01 03:22:01.625460", + "step": 908, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:01.658376", + "step": 908, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04474278539419174, + "timestamp": "2025-10-01 03:22:01.661332", + "step": 909, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:01.702264", + "step": 909, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036643292754888535, + "timestamp": "2025-10-01 03:22:01.715497", + "step": 910, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:01.758758", + "step": 910, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027579236775636673, + "timestamp": "2025-10-01 03:22:01.778017", + "step": 911, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:01.818496", + "step": 911, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010172717273235321, + "timestamp": "2025-10-01 03:22:01.843480", + "step": 912, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:22:04.996136", + "step": 912, + "epoch": 1 + }, + { + "type": "pplx", + "content": 2544034.168225097, + "timestamp": "2025-10-01 03:22:05.008251", + "step": 912, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:05.044768", + "step": 912, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038109153509140015, + "timestamp": "2025-10-01 03:22:05.055911", + "step": 913, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:05.096576", + "step": 913, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03052874468266964, + "timestamp": "2025-10-01 03:22:05.099416", + "step": 914, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:05.140033", + "step": 914, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022531241178512573, + "timestamp": "2025-10-01 03:22:05.142762", + "step": 915, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:05.184374", + "step": 915, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030626075342297554, + "timestamp": "2025-10-01 03:22:05.215201", + "step": 916, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:05.258393", + "step": 916, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011968119069933891, + "timestamp": "2025-10-01 03:22:05.269520", + "step": 917, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:05.318937", + "step": 917, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.042155828326940536, + "timestamp": "2025-10-01 03:22:05.330057", + "step": 918, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:05.381247", + "step": 918, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01654360257089138, + "timestamp": "2025-10-01 03:22:05.385398", + "step": 919, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:05.432879", + "step": 919, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04725457355380058, + "timestamp": "2025-10-01 03:22:05.467440", + "step": 920, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:05.513869", + "step": 920, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.051677584648132324, + "timestamp": "2025-10-01 03:22:05.524918", + "step": 921, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:05.571873", + "step": 921, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016824636608362198, + "timestamp": "2025-10-01 03:22:05.582251", + "step": 922, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:05.617285", + "step": 922, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04348187893629074, + "timestamp": "2025-10-01 03:22:05.628589", + "step": 923, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:05.677935", + "step": 923, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021238666027784348, + "timestamp": "2025-10-01 03:22:05.708921", + "step": 924, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:05.758297", + "step": 924, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03334041312336922, + "timestamp": "2025-10-01 03:22:05.762026", + "step": 925, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:05.810259", + "step": 925, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014025497250258923, + "timestamp": "2025-10-01 03:22:05.818885", + "step": 926, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:05.861006", + "step": 926, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04412190243601799, + "timestamp": "2025-10-01 03:22:05.870522", + "step": 927, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:05.911935", + "step": 927, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0445001982152462, + "timestamp": "2025-10-01 03:22:05.943979", + "step": 928, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:05.996982", + "step": 928, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03661196678876877, + "timestamp": "2025-10-01 03:22:06.008764", + "step": 929, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.063700", + "step": 929, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04730541631579399, + "timestamp": "2025-10-01 03:22:06.067862", + "step": 930, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.109742", + "step": 930, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012386140413582325, + "timestamp": "2025-10-01 03:22:06.124115", + "step": 931, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.168226", + "step": 931, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07179244607686996, + "timestamp": "2025-10-01 03:22:06.200370", + "step": 932, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.241757", + "step": 932, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02282717265188694, + "timestamp": "2025-10-01 03:22:06.252751", + "step": 933, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.296028", + "step": 933, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020878612995147705, + "timestamp": "2025-10-01 03:22:06.307736", + "step": 934, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.349679", + "step": 934, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033629365265369415, + "timestamp": "2025-10-01 03:22:06.360127", + "step": 935, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.416243", + "step": 935, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05612532049417496, + "timestamp": "2025-10-01 03:22:06.451364", + "step": 936, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.492353", + "step": 936, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031821493059396744, + "timestamp": "2025-10-01 03:22:06.506657", + "step": 937, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.559807", + "step": 937, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04422537609934807, + "timestamp": "2025-10-01 03:22:06.575037", + "step": 938, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.626549", + "step": 938, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030889330431818962, + "timestamp": "2025-10-01 03:22:06.633497", + "step": 939, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.691139", + "step": 939, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0336841382086277, + "timestamp": "2025-10-01 03:22:06.723010", + "step": 940, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.770752", + "step": 940, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029928449541330338, + "timestamp": "2025-10-01 03:22:06.775284", + "step": 941, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.825693", + "step": 941, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023427974432706833, + "timestamp": "2025-10-01 03:22:06.835272", + "step": 942, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.885585", + "step": 942, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04927228018641472, + "timestamp": "2025-10-01 03:22:06.889041", + "step": 943, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:06.933007", + "step": 943, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01578955538570881, + "timestamp": "2025-10-01 03:22:06.960914", + "step": 944, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:07.008379", + "step": 944, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017623601481318474, + "timestamp": "2025-10-01 03:22:07.015183", + "step": 945, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:07.055966", + "step": 945, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01468904409557581, + "timestamp": "2025-10-01 03:22:07.063681", + "step": 946, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:07.110956", + "step": 946, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028890976682305336, + "timestamp": "2025-10-01 03:22:07.117452", + "step": 947, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:07.162357", + "step": 947, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05565451458096504, + "timestamp": "2025-10-01 03:22:07.192818", + "step": 948, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:07.232205", + "step": 948, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02603675052523613, + "timestamp": "2025-10-01 03:22:07.238703", + "step": 949, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:07.276927", + "step": 949, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026051847264170647, + "timestamp": "2025-10-01 03:22:07.284331", + "step": 950, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:07.324943", + "step": 950, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05518445000052452, + "timestamp": "2025-10-01 03:22:07.332996", + "step": 951, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:07.371300", + "step": 951, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041074950248003006, + "timestamp": "2025-10-01 03:22:07.400176", + "step": 952, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:07.439075", + "step": 952, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012918048538267612, + "timestamp": "2025-10-01 03:22:07.445393", + "step": 953, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:07.488384", + "step": 953, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0059538306668400764, + "timestamp": "2025-10-01 03:22:07.500319", + "step": 954, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:07.542887", + "step": 954, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030985703691840172, + "timestamp": "2025-10-01 03:22:07.546461", + "step": 955, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:07.583819", + "step": 955, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028188714757561684, + "timestamp": "2025-10-01 03:22:07.610996", + "step": 956, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:07.652170", + "step": 956, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04744136333465576, + "timestamp": "2025-10-01 03:22:07.660004", + "step": 957, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:07.700377", + "step": 957, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03218479081988335, + "timestamp": "2025-10-01 03:22:07.705400", + "step": 958, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:07.751220", + "step": 958, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020569559186697006, + "timestamp": "2025-10-01 03:22:07.758351", + "step": 959, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:07.806792", + "step": 959, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016243046149611473, + "timestamp": "2025-10-01 03:22:07.833021", + "step": 960, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:22:07.871366", + "step": 960, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017472652718424797, + "timestamp": "2025-10-01 03:22:07.879805", + "step": 961, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:07.919868", + "step": 961, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.044147852808237076, + "timestamp": "2025-10-01 03:22:07.925997", + "step": 962, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:07.966175", + "step": 962, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020259426906704903, + "timestamp": "2025-10-01 03:22:07.974638", + "step": 963, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.017008", + "step": 963, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03978366032242775, + "timestamp": "2025-10-01 03:22:08.052084", + "step": 964, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.097256", + "step": 964, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0630212128162384, + "timestamp": "2025-10-01 03:22:08.107956", + "step": 965, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.162062", + "step": 965, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03003385290503502, + "timestamp": "2025-10-01 03:22:08.168797", + "step": 966, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:08.212946", + "step": 966, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.044034551829099655, + "timestamp": "2025-10-01 03:22:08.216260", + "step": 967, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.267660", + "step": 967, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016516435891389847, + "timestamp": "2025-10-01 03:22:08.296493", + "step": 968, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.339579", + "step": 968, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026635563001036644, + "timestamp": "2025-10-01 03:22:08.347026", + "step": 969, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.388688", + "step": 969, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011871963739395142, + "timestamp": "2025-10-01 03:22:08.395809", + "step": 970, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.441367", + "step": 970, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026475820690393448, + "timestamp": "2025-10-01 03:22:08.448787", + "step": 971, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:08.487755", + "step": 971, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014591134153306484, + "timestamp": "2025-10-01 03:22:08.516190", + "step": 972, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.560414", + "step": 972, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.10635096579790115, + "timestamp": "2025-10-01 03:22:08.570037", + "step": 973, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.609655", + "step": 973, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040160201489925385, + "timestamp": "2025-10-01 03:22:08.618976", + "step": 974, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.657713", + "step": 974, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016089702025055885, + "timestamp": "2025-10-01 03:22:08.671619", + "step": 975, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.715631", + "step": 975, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07419993728399277, + "timestamp": "2025-10-01 03:22:08.748490", + "step": 976, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.791625", + "step": 976, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03740239888429642, + "timestamp": "2025-10-01 03:22:08.805233", + "step": 977, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.846551", + "step": 977, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04611891880631447, + "timestamp": "2025-10-01 03:22:08.858379", + "step": 978, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:08.897112", + "step": 978, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016859842464327812, + "timestamp": "2025-10-01 03:22:08.905403", + "step": 979, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:08.946623", + "step": 979, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038674719631671906, + "timestamp": "2025-10-01 03:22:08.977735", + "step": 980, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:09.017148", + "step": 980, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003023758064955473, + "timestamp": "2025-10-01 03:22:09.028028", + "step": 981, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:09.075421", + "step": 981, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02599949948489666, + "timestamp": "2025-10-01 03:22:09.088624", + "step": 982, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:09.131108", + "step": 982, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018400395289063454, + "timestamp": "2025-10-01 03:22:09.135546", + "step": 983, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:09.184722", + "step": 983, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0244524534791708, + "timestamp": "2025-10-01 03:22:09.210494", + "step": 984, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:09.245249", + "step": 984, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03263118490576744, + "timestamp": "2025-10-01 03:22:09.258271", + "step": 985, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:09.305980", + "step": 985, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04077742248773575, + "timestamp": "2025-10-01 03:22:09.316255", + "step": 986, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:09.356510", + "step": 986, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010660155676305294, + "timestamp": "2025-10-01 03:22:09.365195", + "step": 987, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:09.405733", + "step": 987, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028138911351561546, + "timestamp": "2025-10-01 03:22:09.431155", + "step": 988, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:09.471451", + "step": 988, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05787266418337822, + "timestamp": "2025-10-01 03:22:09.475804", + "step": 989, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:09.524071", + "step": 989, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04327806830406189, + "timestamp": "2025-10-01 03:22:09.535177", + "step": 990, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:09.579570", + "step": 990, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015106426551938057, + "timestamp": "2025-10-01 03:22:09.589096", + "step": 991, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:09.649051", + "step": 991, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016044601798057556, + "timestamp": "2025-10-01 03:22:09.682302", + "step": 992, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:09.725292", + "step": 992, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06651883572340012, + "timestamp": "2025-10-01 03:22:09.738650", + "step": 993, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:09.782029", + "step": 993, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03548578917980194, + "timestamp": "2025-10-01 03:22:09.794744", + "step": 994, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:09.837920", + "step": 994, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02808523178100586, + "timestamp": "2025-10-01 03:22:09.847586", + "step": 995, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:09.893754", + "step": 995, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005928338039666414, + "timestamp": "2025-10-01 03:22:09.928364", + "step": 996, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:09.974466", + "step": 996, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0676623210310936, + "timestamp": "2025-10-01 03:22:09.985838", + "step": 997, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:10.041426", + "step": 997, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005860020872205496, + "timestamp": "2025-10-01 03:22:10.054814", + "step": 998, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:10.099613", + "step": 998, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041421327739953995, + "timestamp": "2025-10-01 03:22:10.110905", + "step": 999, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:10.167738", + "step": 999, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021903011947870255, + "timestamp": "2025-10-01 03:22:10.200310", + "step": 1000, + "epoch": 1 + }, + { + "type": "info", + "content": "Checkpoint saved at step 1000", + "timestamp": "2025-10-01 03:22:15.379612", + "step": 1000, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:15.421307", + "step": 1000, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035041846334934235, + "timestamp": "2025-10-01 03:22:15.430250", + "step": 1001, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:15.467373", + "step": 1001, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04962441697716713, + "timestamp": "2025-10-01 03:22:15.470771", + "step": 1002, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:15.508404", + "step": 1002, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038658447563648224, + "timestamp": "2025-10-01 03:22:15.518543", + "step": 1003, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:15.558477", + "step": 1003, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03604857996106148, + "timestamp": "2025-10-01 03:22:15.584061", + "step": 1004, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:15.630921", + "step": 1004, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04416282847523689, + "timestamp": "2025-10-01 03:22:15.644100", + "step": 1005, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:15.678921", + "step": 1005, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03594904765486717, + "timestamp": "2025-10-01 03:22:15.690378", + "step": 1006, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:15.724373", + "step": 1006, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014638684689998627, + "timestamp": "2025-10-01 03:22:15.738106", + "step": 1007, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:15.785159", + "step": 1007, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022938353940844536, + "timestamp": "2025-10-01 03:22:15.817972", + "step": 1008, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:15.859232", + "step": 1008, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04478529840707779, + "timestamp": "2025-10-01 03:22:15.865775", + "step": 1009, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:15.905040", + "step": 1009, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040781356394290924, + "timestamp": "2025-10-01 03:22:15.914430", + "step": 1010, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:15.954227", + "step": 1010, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03544432297348976, + "timestamp": "2025-10-01 03:22:15.963741", + "step": 1011, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.002866", + "step": 1011, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025107089430093765, + "timestamp": "2025-10-01 03:22:16.037521", + "step": 1012, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.078436", + "step": 1012, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04774821922183037, + "timestamp": "2025-10-01 03:22:16.086900", + "step": 1013, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:16.133472", + "step": 1013, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0286965724080801, + "timestamp": "2025-10-01 03:22:16.140696", + "step": 1014, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.191362", + "step": 1014, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0285301823168993, + "timestamp": "2025-10-01 03:22:16.196269", + "step": 1015, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.231377", + "step": 1015, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04680923372507095, + "timestamp": "2025-10-01 03:22:16.257349", + "step": 1016, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:16.304070", + "step": 1016, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07847265154123306, + "timestamp": "2025-10-01 03:22:16.314349", + "step": 1017, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.363149", + "step": 1017, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022073904052376747, + "timestamp": "2025-10-01 03:22:16.373134", + "step": 1018, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.429563", + "step": 1018, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018719034269452095, + "timestamp": "2025-10-01 03:22:16.435496", + "step": 1019, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:16.471063", + "step": 1019, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029562121257185936, + "timestamp": "2025-10-01 03:22:16.499285", + "step": 1020, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.532603", + "step": 1020, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0209902785718441, + "timestamp": "2025-10-01 03:22:16.537565", + "step": 1021, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.573358", + "step": 1021, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039341557770967484, + "timestamp": "2025-10-01 03:22:16.581632", + "step": 1022, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.630790", + "step": 1022, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031009327620267868, + "timestamp": "2025-10-01 03:22:16.638316", + "step": 1023, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.691568", + "step": 1023, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021748827770352364, + "timestamp": "2025-10-01 03:22:16.717764", + "step": 1024, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.758670", + "step": 1024, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02673359028995037, + "timestamp": "2025-10-01 03:22:16.763318", + "step": 1025, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.802578", + "step": 1025, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03179049864411354, + "timestamp": "2025-10-01 03:22:16.805637", + "step": 1026, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:16.843472", + "step": 1026, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023841777816414833, + "timestamp": "2025-10-01 03:22:16.853521", + "step": 1027, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.900907", + "step": 1027, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02070223167538643, + "timestamp": "2025-10-01 03:22:16.927387", + "step": 1028, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:16.968600", + "step": 1028, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028673233464360237, + "timestamp": "2025-10-01 03:22:16.980632", + "step": 1029, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.034916", + "step": 1029, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01134380605071783, + "timestamp": "2025-10-01 03:22:17.044195", + "step": 1030, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.083816", + "step": 1030, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026747511699795723, + "timestamp": "2025-10-01 03:22:17.091585", + "step": 1031, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.133668", + "step": 1031, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011981712654232979, + "timestamp": "2025-10-01 03:22:17.164142", + "step": 1032, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:17.208866", + "step": 1032, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.047372523695230484, + "timestamp": "2025-10-01 03:22:17.215343", + "step": 1033, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.253119", + "step": 1033, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03212440758943558, + "timestamp": "2025-10-01 03:22:17.256441", + "step": 1034, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.299163", + "step": 1034, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029276546090841293, + "timestamp": "2025-10-01 03:22:17.303777", + "step": 1035, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.347696", + "step": 1035, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010318256914615631, + "timestamp": "2025-10-01 03:22:17.378430", + "step": 1036, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.429729", + "step": 1036, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.050353724509477615, + "timestamp": "2025-10-01 03:22:17.434743", + "step": 1037, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:17.477554", + "step": 1037, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032142557203769684, + "timestamp": "2025-10-01 03:22:17.484270", + "step": 1038, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.522195", + "step": 1038, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03695840761065483, + "timestamp": "2025-10-01 03:22:17.524958", + "step": 1039, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.567694", + "step": 1039, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03866345062851906, + "timestamp": "2025-10-01 03:22:17.595026", + "step": 1040, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.629442", + "step": 1040, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06269795447587967, + "timestamp": "2025-10-01 03:22:17.633029", + "step": 1041, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:17.668411", + "step": 1041, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04989275708794594, + "timestamp": "2025-10-01 03:22:17.673020", + "step": 1042, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.707773", + "step": 1042, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04997537285089493, + "timestamp": "2025-10-01 03:22:17.715267", + "step": 1043, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.754983", + "step": 1043, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014689329080283642, + "timestamp": "2025-10-01 03:22:17.781147", + "step": 1044, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.824723", + "step": 1044, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03237932547926903, + "timestamp": "2025-10-01 03:22:17.830893", + "step": 1045, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.869169", + "step": 1045, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015184581279754639, + "timestamp": "2025-10-01 03:22:17.875708", + "step": 1046, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.909305", + "step": 1046, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0172930508852005, + "timestamp": "2025-10-01 03:22:17.915712", + "step": 1047, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:17.954524", + "step": 1047, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04932786151766777, + "timestamp": "2025-10-01 03:22:17.986760", + "step": 1048, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.029237", + "step": 1048, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040225785225629807, + "timestamp": "2025-10-01 03:22:18.038499", + "step": 1049, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.081504", + "step": 1049, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016568774357438087, + "timestamp": "2025-10-01 03:22:18.085292", + "step": 1050, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:18.131875", + "step": 1050, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0409993901848793, + "timestamp": "2025-10-01 03:22:18.142044", + "step": 1051, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.183767", + "step": 1051, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04840872064232826, + "timestamp": "2025-10-01 03:22:18.208443", + "step": 1052, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.245027", + "step": 1052, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019608836621046066, + "timestamp": "2025-10-01 03:22:18.251210", + "step": 1053, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:18.289892", + "step": 1053, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02013350836932659, + "timestamp": "2025-10-01 03:22:18.295820", + "step": 1054, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.341328", + "step": 1054, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08478923887014389, + "timestamp": "2025-10-01 03:22:18.345071", + "step": 1055, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.383449", + "step": 1055, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019059915095567703, + "timestamp": "2025-10-01 03:22:18.413440", + "step": 1056, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.457900", + "step": 1056, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026907071471214294, + "timestamp": "2025-10-01 03:22:18.462359", + "step": 1057, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.497308", + "step": 1057, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04971792548894882, + "timestamp": "2025-10-01 03:22:18.507126", + "step": 1058, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.546209", + "step": 1058, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033201828598976135, + "timestamp": "2025-10-01 03:22:18.557194", + "step": 1059, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.613522", + "step": 1059, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046558916568756104, + "timestamp": "2025-10-01 03:22:18.647889", + "step": 1060, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.697535", + "step": 1060, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0330783948302269, + "timestamp": "2025-10-01 03:22:18.708522", + "step": 1061, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.749425", + "step": 1061, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05252961441874504, + "timestamp": "2025-10-01 03:22:18.753868", + "step": 1062, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.805338", + "step": 1062, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06015091761946678, + "timestamp": "2025-10-01 03:22:18.811516", + "step": 1063, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:18.857958", + "step": 1063, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027548322454094887, + "timestamp": "2025-10-01 03:22:18.889585", + "step": 1064, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:22:21.794584", + "step": 1064, + "epoch": 1 + }, + { + "type": "pplx", + "content": 2457449.4824426733, + "timestamp": "2025-10-01 03:22:21.799266", + "step": 1064, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:21.832320", + "step": 1064, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029412319883704185, + "timestamp": "2025-10-01 03:22:21.836750", + "step": 1065, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:21.879324", + "step": 1065, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04567786306142807, + "timestamp": "2025-10-01 03:22:21.891110", + "step": 1066, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:21.930953", + "step": 1066, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009278521873056889, + "timestamp": "2025-10-01 03:22:21.942848", + "step": 1067, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:21.985757", + "step": 1067, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029525283724069595, + "timestamp": "2025-10-01 03:22:22.012469", + "step": 1068, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.047443", + "step": 1068, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001075760810635984, + "timestamp": "2025-10-01 03:22:22.056778", + "step": 1069, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.098421", + "step": 1069, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008873596787452698, + "timestamp": "2025-10-01 03:22:22.105424", + "step": 1070, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:22.144146", + "step": 1070, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02858029678463936, + "timestamp": "2025-10-01 03:22:22.150854", + "step": 1071, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.189207", + "step": 1071, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012302844785153866, + "timestamp": "2025-10-01 03:22:22.217041", + "step": 1072, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.254787", + "step": 1072, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04618891701102257, + "timestamp": "2025-10-01 03:22:22.262423", + "step": 1073, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.308268", + "step": 1073, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02010185457766056, + "timestamp": "2025-10-01 03:22:22.313474", + "step": 1074, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.352317", + "step": 1074, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04843506962060928, + "timestamp": "2025-10-01 03:22:22.361149", + "step": 1075, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.402346", + "step": 1075, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.10020335018634796, + "timestamp": "2025-10-01 03:22:22.431216", + "step": 1076, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.473966", + "step": 1076, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016758255660533905, + "timestamp": "2025-10-01 03:22:22.480074", + "step": 1077, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.524092", + "step": 1077, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02767782472074032, + "timestamp": "2025-10-01 03:22:22.528006", + "step": 1078, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:22.563986", + "step": 1078, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022486966103315353, + "timestamp": "2025-10-01 03:22:22.568532", + "step": 1079, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.603382", + "step": 1079, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030768131837248802, + "timestamp": "2025-10-01 03:22:22.630554", + "step": 1080, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.669227", + "step": 1080, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01388077437877655, + "timestamp": "2025-10-01 03:22:22.673450", + "step": 1081, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.715197", + "step": 1081, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03797846660017967, + "timestamp": "2025-10-01 03:22:22.720468", + "step": 1082, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.755473", + "step": 1082, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014572947286069393, + "timestamp": "2025-10-01 03:22:22.762919", + "step": 1083, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.799893", + "step": 1083, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018778393045067787, + "timestamp": "2025-10-01 03:22:22.825247", + "step": 1084, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.859723", + "step": 1084, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023885788396000862, + "timestamp": "2025-10-01 03:22:22.867449", + "step": 1085, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:22.905151", + "step": 1085, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024863291531801224, + "timestamp": "2025-10-01 03:22:22.914089", + "step": 1086, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.948344", + "step": 1086, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014569595456123352, + "timestamp": "2025-10-01 03:22:22.952018", + "step": 1087, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:22.986258", + "step": 1087, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01764153502881527, + "timestamp": "2025-10-01 03:22:23.012784", + "step": 1088, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.048504", + "step": 1088, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010714457370340824, + "timestamp": "2025-10-01 03:22:23.051643", + "step": 1089, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.084732", + "step": 1089, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0325903482735157, + "timestamp": "2025-10-01 03:22:23.093858", + "step": 1090, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:23.140551", + "step": 1090, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006148893386125565, + "timestamp": "2025-10-01 03:22:23.150083", + "step": 1091, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.191172", + "step": 1091, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039594169706106186, + "timestamp": "2025-10-01 03:22:23.218385", + "step": 1092, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.260739", + "step": 1092, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024238359183073044, + "timestamp": "2025-10-01 03:22:23.269999", + "step": 1093, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.311914", + "step": 1093, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03776811808347702, + "timestamp": "2025-10-01 03:22:23.317103", + "step": 1094, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.359753", + "step": 1094, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.058106403797864914, + "timestamp": "2025-10-01 03:22:23.365912", + "step": 1095, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.400583", + "step": 1095, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022857408970594406, + "timestamp": "2025-10-01 03:22:23.425148", + "step": 1096, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.461008", + "step": 1096, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01170319877564907, + "timestamp": "2025-10-01 03:22:23.468427", + "step": 1097, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:23.506936", + "step": 1097, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03542212024331093, + "timestamp": "2025-10-01 03:22:23.510745", + "step": 1098, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.545175", + "step": 1098, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08582345396280289, + "timestamp": "2025-10-01 03:22:23.550813", + "step": 1099, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.586829", + "step": 1099, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03604473918676376, + "timestamp": "2025-10-01 03:22:23.612830", + "step": 1100, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.646213", + "step": 1100, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049609895795583725, + "timestamp": "2025-10-01 03:22:23.651944", + "step": 1101, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.689297", + "step": 1101, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03934171050786972, + "timestamp": "2025-10-01 03:22:23.694329", + "step": 1102, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.740137", + "step": 1102, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05229278653860092, + "timestamp": "2025-10-01 03:22:23.746440", + "step": 1103, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:23.785279", + "step": 1103, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04124576598405838, + "timestamp": "2025-10-01 03:22:23.810348", + "step": 1104, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:23.852682", + "step": 1104, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02873980812728405, + "timestamp": "2025-10-01 03:22:23.855462", + "step": 1105, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:23.892165", + "step": 1105, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05697495490312576, + "timestamp": "2025-10-01 03:22:23.899833", + "step": 1106, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:23.944766", + "step": 1106, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04573697969317436, + "timestamp": "2025-10-01 03:22:23.951966", + "step": 1107, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:23.993300", + "step": 1107, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009654120542109013, + "timestamp": "2025-10-01 03:22:24.025081", + "step": 1108, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.061042", + "step": 1108, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018448354676365852, + "timestamp": "2025-10-01 03:22:24.064579", + "step": 1109, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.098626", + "step": 1109, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021288201212882996, + "timestamp": "2025-10-01 03:22:24.100644", + "step": 1110, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.132979", + "step": 1110, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03859984874725342, + "timestamp": "2025-10-01 03:22:24.135698", + "step": 1111, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.169229", + "step": 1111, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029940148815512657, + "timestamp": "2025-10-01 03:22:24.197375", + "step": 1112, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.234653", + "step": 1112, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01797810010612011, + "timestamp": "2025-10-01 03:22:24.238996", + "step": 1113, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.280678", + "step": 1113, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034998584538698196, + "timestamp": "2025-10-01 03:22:24.284597", + "step": 1114, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.332420", + "step": 1114, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02848510444164276, + "timestamp": "2025-10-01 03:22:24.335170", + "step": 1115, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.370591", + "step": 1115, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021142849698662758, + "timestamp": "2025-10-01 03:22:24.397440", + "step": 1116, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.435915", + "step": 1116, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027680769562721252, + "timestamp": "2025-10-01 03:22:24.438297", + "step": 1117, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.470171", + "step": 1117, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018334034830331802, + "timestamp": "2025-10-01 03:22:24.472320", + "step": 1118, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:24.506775", + "step": 1118, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03482576459646225, + "timestamp": "2025-10-01 03:22:24.511959", + "step": 1119, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:24.548684", + "step": 1119, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04804804548621178, + "timestamp": "2025-10-01 03:22:24.574892", + "step": 1120, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:24.607558", + "step": 1120, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06256788223981857, + "timestamp": "2025-10-01 03:22:24.609855", + "step": 1121, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.654836", + "step": 1121, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.048247747123241425, + "timestamp": "2025-10-01 03:22:24.657217", + "step": 1122, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.691676", + "step": 1122, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02656741626560688, + "timestamp": "2025-10-01 03:22:24.693828", + "step": 1123, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.733875", + "step": 1123, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01877322606742382, + "timestamp": "2025-10-01 03:22:24.757613", + "step": 1124, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:24.790141", + "step": 1124, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06559600681066513, + "timestamp": "2025-10-01 03:22:24.792205", + "step": 1125, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.834944", + "step": 1125, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02373730018734932, + "timestamp": "2025-10-01 03:22:24.836925", + "step": 1126, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.870265", + "step": 1126, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00947795994579792, + "timestamp": "2025-10-01 03:22:24.872342", + "step": 1127, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.904559", + "step": 1127, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02639543078839779, + "timestamp": "2025-10-01 03:22:24.928021", + "step": 1128, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:24.968874", + "step": 1128, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02415826916694641, + "timestamp": "2025-10-01 03:22:24.971087", + "step": 1129, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:25.005578", + "step": 1129, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040283214300870895, + "timestamp": "2025-10-01 03:22:25.007806", + "step": 1130, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.041283", + "step": 1130, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025431832298636436, + "timestamp": "2025-10-01 03:22:25.050365", + "step": 1131, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.089336", + "step": 1131, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018612945452332497, + "timestamp": "2025-10-01 03:22:25.113403", + "step": 1132, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.150754", + "step": 1132, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018524030223488808, + "timestamp": "2025-10-01 03:22:25.154311", + "step": 1133, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.190037", + "step": 1133, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036666952073574066, + "timestamp": "2025-10-01 03:22:25.193732", + "step": 1134, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.230064", + "step": 1134, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028593959286808968, + "timestamp": "2025-10-01 03:22:25.233186", + "step": 1135, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.266570", + "step": 1135, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04811309650540352, + "timestamp": "2025-10-01 03:22:25.299972", + "step": 1136, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:25.331350", + "step": 1136, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03014993481338024, + "timestamp": "2025-10-01 03:22:25.333926", + "step": 1137, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.376505", + "step": 1137, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04123203083872795, + "timestamp": "2025-10-01 03:22:25.378916", + "step": 1138, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.414635", + "step": 1138, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01571117714047432, + "timestamp": "2025-10-01 03:22:25.416593", + "step": 1139, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.452476", + "step": 1139, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05377236381173134, + "timestamp": "2025-10-01 03:22:25.476141", + "step": 1140, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.509041", + "step": 1140, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03832404688000679, + "timestamp": "2025-10-01 03:22:25.511298", + "step": 1141, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.545594", + "step": 1141, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05219293013215065, + "timestamp": "2025-10-01 03:22:25.547577", + "step": 1142, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.580345", + "step": 1142, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018502669408917427, + "timestamp": "2025-10-01 03:22:25.582417", + "step": 1143, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.615520", + "step": 1143, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017837882041931152, + "timestamp": "2025-10-01 03:22:25.639054", + "step": 1144, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.680704", + "step": 1144, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005946934223175049, + "timestamp": "2025-10-01 03:22:25.682705", + "step": 1145, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.715584", + "step": 1145, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014376145787537098, + "timestamp": "2025-10-01 03:22:25.717740", + "step": 1146, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.750969", + "step": 1146, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022504502907395363, + "timestamp": "2025-10-01 03:22:25.753083", + "step": 1147, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.786265", + "step": 1147, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03458306938409805, + "timestamp": "2025-10-01 03:22:25.809948", + "step": 1148, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.852657", + "step": 1148, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008610841818153858, + "timestamp": "2025-10-01 03:22:25.854741", + "step": 1149, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.898156", + "step": 1149, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003959666006267071, + "timestamp": "2025-10-01 03:22:25.900335", + "step": 1150, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.934374", + "step": 1150, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03751297667622566, + "timestamp": "2025-10-01 03:22:25.936399", + "step": 1151, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:25.970848", + "step": 1151, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07588804513216019, + "timestamp": "2025-10-01 03:22:25.994613", + "step": 1152, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.033660", + "step": 1152, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06934117525815964, + "timestamp": "2025-10-01 03:22:26.035839", + "step": 1153, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.069093", + "step": 1153, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06772889941930771, + "timestamp": "2025-10-01 03:22:26.071042", + "step": 1154, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.108050", + "step": 1154, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06890495121479034, + "timestamp": "2025-10-01 03:22:26.110100", + "step": 1155, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.143879", + "step": 1155, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025469569489359856, + "timestamp": "2025-10-01 03:22:26.167534", + "step": 1156, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.211663", + "step": 1156, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028119510039687157, + "timestamp": "2025-10-01 03:22:26.213772", + "step": 1157, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.256670", + "step": 1157, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04479093477129936, + "timestamp": "2025-10-01 03:22:26.258786", + "step": 1158, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:26.303179", + "step": 1158, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01654881425201893, + "timestamp": "2025-10-01 03:22:26.305279", + "step": 1159, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:26.337432", + "step": 1159, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01394333690404892, + "timestamp": "2025-10-01 03:22:26.361054", + "step": 1160, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.405765", + "step": 1160, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046003084629774094, + "timestamp": "2025-10-01 03:22:26.408311", + "step": 1161, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.462412", + "step": 1161, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022123966366052628, + "timestamp": "2025-10-01 03:22:26.464420", + "step": 1162, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.500207", + "step": 1162, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011430291458964348, + "timestamp": "2025-10-01 03:22:26.502287", + "step": 1163, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.540313", + "step": 1163, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010798372328281403, + "timestamp": "2025-10-01 03:22:26.563831", + "step": 1164, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:26.601543", + "step": 1164, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017313113436102867, + "timestamp": "2025-10-01 03:22:26.605541", + "step": 1165, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.640121", + "step": 1165, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03441449627280235, + "timestamp": "2025-10-01 03:22:26.642260", + "step": 1166, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:26.677805", + "step": 1166, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021350370720028877, + "timestamp": "2025-10-01 03:22:26.679814", + "step": 1167, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.717226", + "step": 1167, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03246111795306206, + "timestamp": "2025-10-01 03:22:26.740786", + "step": 1168, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:26.783095", + "step": 1168, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06401068717241287, + "timestamp": "2025-10-01 03:22:26.785180", + "step": 1169, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.819356", + "step": 1169, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011304757557809353, + "timestamp": "2025-10-01 03:22:26.821517", + "step": 1170, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.854906", + "step": 1170, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020581407472491264, + "timestamp": "2025-10-01 03:22:26.857035", + "step": 1171, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.891351", + "step": 1171, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04098847135901451, + "timestamp": "2025-10-01 03:22:26.915549", + "step": 1172, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:26.963247", + "step": 1172, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028973450884222984, + "timestamp": "2025-10-01 03:22:26.965204", + "step": 1173, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.010827", + "step": 1173, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012964311055839062, + "timestamp": "2025-10-01 03:22:27.013448", + "step": 1174, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.053711", + "step": 1174, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008269923739135265, + "timestamp": "2025-10-01 03:22:27.055977", + "step": 1175, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.088906", + "step": 1175, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024838488548994064, + "timestamp": "2025-10-01 03:22:27.112709", + "step": 1176, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.155395", + "step": 1176, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.059442441910505295, + "timestamp": "2025-10-01 03:22:27.157702", + "step": 1177, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:27.190544", + "step": 1177, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014873544685542583, + "timestamp": "2025-10-01 03:22:27.192740", + "step": 1178, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:22:27.253564", + "step": 1178, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05689582601189613, + "timestamp": "2025-10-01 03:22:27.257092", + "step": 1179, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.305961", + "step": 1179, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04023022577166557, + "timestamp": "2025-10-01 03:22:27.329566", + "step": 1180, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.374429", + "step": 1180, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021805519238114357, + "timestamp": "2025-10-01 03:22:27.376676", + "step": 1181, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.423156", + "step": 1181, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02039695903658867, + "timestamp": "2025-10-01 03:22:27.425301", + "step": 1182, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.465108", + "step": 1182, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01541866920888424, + "timestamp": "2025-10-01 03:22:27.467368", + "step": 1183, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.521043", + "step": 1183, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06592859327793121, + "timestamp": "2025-10-01 03:22:27.544574", + "step": 1184, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.582098", + "step": 1184, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.064814493060112, + "timestamp": "2025-10-01 03:22:27.584110", + "step": 1185, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.638200", + "step": 1185, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021942438557744026, + "timestamp": "2025-10-01 03:22:27.640222", + "step": 1186, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.681611", + "step": 1186, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04591352120041847, + "timestamp": "2025-10-01 03:22:27.683747", + "step": 1187, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.728079", + "step": 1187, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02521132491528988, + "timestamp": "2025-10-01 03:22:27.751711", + "step": 1188, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.803194", + "step": 1188, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03995806723833084, + "timestamp": "2025-10-01 03:22:27.806585", + "step": 1189, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.846077", + "step": 1189, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0025819253642112017, + "timestamp": "2025-10-01 03:22:27.848495", + "step": 1190, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.887356", + "step": 1190, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.054595042020082474, + "timestamp": "2025-10-01 03:22:27.889465", + "step": 1191, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.932953", + "step": 1191, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03117014467716217, + "timestamp": "2025-10-01 03:22:27.956836", + "step": 1192, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:27.998524", + "step": 1192, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03673925995826721, + "timestamp": "2025-10-01 03:22:28.000501", + "step": 1193, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:28.046880", + "step": 1193, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05317741259932518, + "timestamp": "2025-10-01 03:22:28.054200", + "step": 1194, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:28.096534", + "step": 1194, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06243967264890671, + "timestamp": "2025-10-01 03:22:28.101985", + "step": 1195, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.152098", + "step": 1195, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0378265343606472, + "timestamp": "2025-10-01 03:22:28.176231", + "step": 1196, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.210293", + "step": 1196, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.056114938110113144, + "timestamp": "2025-10-01 03:22:28.213543", + "step": 1197, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:28.248098", + "step": 1197, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0707237645983696, + "timestamp": "2025-10-01 03:22:28.250019", + "step": 1198, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.282259", + "step": 1198, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00982194859534502, + "timestamp": "2025-10-01 03:22:28.284601", + "step": 1199, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.324332", + "step": 1199, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01881309412419796, + "timestamp": "2025-10-01 03:22:28.347800", + "step": 1200, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.379963", + "step": 1200, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08469481766223907, + "timestamp": "2025-10-01 03:22:28.381939", + "step": 1201, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.414198", + "step": 1201, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022673126310110092, + "timestamp": "2025-10-01 03:22:28.417258", + "step": 1202, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.453942", + "step": 1202, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035369787365198135, + "timestamp": "2025-10-01 03:22:28.456030", + "step": 1203, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.492237", + "step": 1203, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04300442337989807, + "timestamp": "2025-10-01 03:22:28.515776", + "step": 1204, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.548325", + "step": 1204, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.048773471266031265, + "timestamp": "2025-10-01 03:22:28.550343", + "step": 1205, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.583070", + "step": 1205, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027133937925100327, + "timestamp": "2025-10-01 03:22:28.585288", + "step": 1206, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:28.617546", + "step": 1206, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008626206777989864, + "timestamp": "2025-10-01 03:22:28.619567", + "step": 1207, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.656911", + "step": 1207, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04126042127609253, + "timestamp": "2025-10-01 03:22:28.680450", + "step": 1208, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.711631", + "step": 1208, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032341118901968, + "timestamp": "2025-10-01 03:22:28.714851", + "step": 1209, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.751271", + "step": 1209, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03229150548577309, + "timestamp": "2025-10-01 03:22:28.753471", + "step": 1210, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.793105", + "step": 1210, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04059656709432602, + "timestamp": "2025-10-01 03:22:28.795273", + "step": 1211, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.831585", + "step": 1211, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02638952061533928, + "timestamp": "2025-10-01 03:22:28.855162", + "step": 1212, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.901279", + "step": 1212, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02631312981247902, + "timestamp": "2025-10-01 03:22:28.903521", + "step": 1213, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.942055", + "step": 1213, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0622260607779026, + "timestamp": "2025-10-01 03:22:28.944089", + "step": 1214, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:28.976767", + "step": 1214, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03060460090637207, + "timestamp": "2025-10-01 03:22:28.978721", + "step": 1215, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:29.017929", + "step": 1215, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04015004634857178, + "timestamp": "2025-10-01 03:22:29.042529", + "step": 1216, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:22:31.705565", + "step": 1216, + "epoch": 1 + }, + { + "type": "pplx", + "content": 2354548.6954123653, + "timestamp": "2025-10-01 03:22:31.707835", + "step": 1216, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:31.738388", + "step": 1216, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02487489953637123, + "timestamp": "2025-10-01 03:22:31.740444", + "step": 1217, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:31.773065", + "step": 1217, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040479328483343124, + "timestamp": "2025-10-01 03:22:31.775293", + "step": 1218, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:31.809652", + "step": 1218, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024845736101269722, + "timestamp": "2025-10-01 03:22:31.811860", + "step": 1219, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:31.847567", + "step": 1219, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025400813668966293, + "timestamp": "2025-10-01 03:22:31.870866", + "step": 1220, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:31.908932", + "step": 1220, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03426255285739899, + "timestamp": "2025-10-01 03:22:31.910833", + "step": 1221, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:31.944015", + "step": 1221, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.054713159799575806, + "timestamp": "2025-10-01 03:22:31.946072", + "step": 1222, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:31.983571", + "step": 1222, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035474181175231934, + "timestamp": "2025-10-01 03:22:31.985578", + "step": 1223, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:32.017392", + "step": 1223, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024791305884718895, + "timestamp": "2025-10-01 03:22:32.041164", + "step": 1224, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.072437", + "step": 1224, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031181517988443375, + "timestamp": "2025-10-01 03:22:32.074485", + "step": 1225, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.105689", + "step": 1225, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03466044366359711, + "timestamp": "2025-10-01 03:22:32.107975", + "step": 1226, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.146223", + "step": 1226, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027942409738898277, + "timestamp": "2025-10-01 03:22:32.148007", + "step": 1227, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.179583", + "step": 1227, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03277614340186119, + "timestamp": "2025-10-01 03:22:32.203586", + "step": 1228, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.236598", + "step": 1228, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03337693214416504, + "timestamp": "2025-10-01 03:22:32.238937", + "step": 1229, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.270857", + "step": 1229, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028508206829428673, + "timestamp": "2025-10-01 03:22:32.274824", + "step": 1230, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.310248", + "step": 1230, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040019165724515915, + "timestamp": "2025-10-01 03:22:32.312481", + "step": 1231, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.346012", + "step": 1231, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04319452866911888, + "timestamp": "2025-10-01 03:22:32.369762", + "step": 1232, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:32.405105", + "step": 1232, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06578938663005829, + "timestamp": "2025-10-01 03:22:32.407759", + "step": 1233, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.457205", + "step": 1233, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013287773355841637, + "timestamp": "2025-10-01 03:22:32.459266", + "step": 1234, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.492408", + "step": 1234, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006652533076703548, + "timestamp": "2025-10-01 03:22:32.495056", + "step": 1235, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.534678", + "step": 1235, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025147438049316406, + "timestamp": "2025-10-01 03:22:32.559360", + "step": 1236, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:32.599368", + "step": 1236, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027959927916526794, + "timestamp": "2025-10-01 03:22:32.602001", + "step": 1237, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.646625", + "step": 1237, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.047662120312452316, + "timestamp": "2025-10-01 03:22:32.649862", + "step": 1238, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:32.691291", + "step": 1238, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04254603385925293, + "timestamp": "2025-10-01 03:22:32.693608", + "step": 1239, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.728888", + "step": 1239, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03164709359407425, + "timestamp": "2025-10-01 03:22:32.753050", + "step": 1240, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.787304", + "step": 1240, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04824475571513176, + "timestamp": "2025-10-01 03:22:32.789815", + "step": 1241, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.826606", + "step": 1241, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04141637310385704, + "timestamp": "2025-10-01 03:22:32.829027", + "step": 1242, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.861021", + "step": 1242, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05681506544351578, + "timestamp": "2025-10-01 03:22:32.863586", + "step": 1243, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.895524", + "step": 1243, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03531910106539726, + "timestamp": "2025-10-01 03:22:32.919441", + "step": 1244, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.955696", + "step": 1244, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041215237230062485, + "timestamp": "2025-10-01 03:22:32.958190", + "step": 1245, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:32.992241", + "step": 1245, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.057274580001831055, + "timestamp": "2025-10-01 03:22:32.994895", + "step": 1246, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:22:33.030429", + "step": 1246, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0460268072783947, + "timestamp": "2025-10-01 03:22:33.033267", + "step": 1247, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:33.067987", + "step": 1247, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039202362298965454, + "timestamp": "2025-10-01 03:22:33.091894", + "step": 1248, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:33.123054", + "step": 1248, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04567693546414375, + "timestamp": "2025-10-01 03:22:33.125641", + "step": 1249, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.160574", + "step": 1249, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03580895811319351, + "timestamp": "2025-10-01 03:22:33.162632", + "step": 1250, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.193780", + "step": 1250, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03637897968292236, + "timestamp": "2025-10-01 03:22:33.196081", + "step": 1251, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.233934", + "step": 1251, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06256627291440964, + "timestamp": "2025-10-01 03:22:33.259217", + "step": 1252, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.293348", + "step": 1252, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027571087703108788, + "timestamp": "2025-10-01 03:22:33.296415", + "step": 1253, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.329346", + "step": 1253, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.059160176664590836, + "timestamp": "2025-10-01 03:22:33.332088", + "step": 1254, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.364426", + "step": 1254, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034878868609666824, + "timestamp": "2025-10-01 03:22:33.366890", + "step": 1255, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.398569", + "step": 1255, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06210717558860779, + "timestamp": "2025-10-01 03:22:33.422598", + "step": 1256, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.455063", + "step": 1256, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06202268227934837, + "timestamp": "2025-10-01 03:22:33.457338", + "step": 1257, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.488684", + "step": 1257, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021956240758299828, + "timestamp": "2025-10-01 03:22:33.491190", + "step": 1258, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.522444", + "step": 1258, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01907072588801384, + "timestamp": "2025-10-01 03:22:33.525071", + "step": 1259, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.560612", + "step": 1259, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08131854236125946, + "timestamp": "2025-10-01 03:22:33.584886", + "step": 1260, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.620965", + "step": 1260, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03147944062948227, + "timestamp": "2025-10-01 03:22:33.622884", + "step": 1261, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:33.654179", + "step": 1261, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02509615384042263, + "timestamp": "2025-10-01 03:22:33.656921", + "step": 1262, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.689519", + "step": 1262, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03961382433772087, + "timestamp": "2025-10-01 03:22:33.692984", + "step": 1263, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.731601", + "step": 1263, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04661189764738083, + "timestamp": "2025-10-01 03:22:33.756749", + "step": 1264, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.789417", + "step": 1264, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023978782817721367, + "timestamp": "2025-10-01 03:22:33.792221", + "step": 1265, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.826768", + "step": 1265, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06288868933916092, + "timestamp": "2025-10-01 03:22:33.829064", + "step": 1266, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:33.865878", + "step": 1266, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016610167920589447, + "timestamp": "2025-10-01 03:22:33.868466", + "step": 1267, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.899951", + "step": 1267, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016808120533823967, + "timestamp": "2025-10-01 03:22:33.923935", + "step": 1268, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.957035", + "step": 1268, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020625237375497818, + "timestamp": "2025-10-01 03:22:33.959521", + "step": 1269, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:33.990206", + "step": 1269, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04204517975449562, + "timestamp": "2025-10-01 03:22:33.992643", + "step": 1270, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.033214", + "step": 1270, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023059885948896408, + "timestamp": "2025-10-01 03:22:34.036080", + "step": 1271, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.072419", + "step": 1271, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04602506011724472, + "timestamp": "2025-10-01 03:22:34.096469", + "step": 1272, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.138761", + "step": 1272, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05415388569235802, + "timestamp": "2025-10-01 03:22:34.141187", + "step": 1273, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.172972", + "step": 1273, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02436983585357666, + "timestamp": "2025-10-01 03:22:34.175409", + "step": 1274, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:34.215561", + "step": 1274, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007525506429374218, + "timestamp": "2025-10-01 03:22:34.217614", + "step": 1275, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.249370", + "step": 1275, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026064950972795486, + "timestamp": "2025-10-01 03:22:34.274247", + "step": 1276, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.314409", + "step": 1276, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01398260984569788, + "timestamp": "2025-10-01 03:22:34.316775", + "step": 1277, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.348501", + "step": 1277, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0717095360159874, + "timestamp": "2025-10-01 03:22:34.350635", + "step": 1278, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.383961", + "step": 1278, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029287056997418404, + "timestamp": "2025-10-01 03:22:34.386263", + "step": 1279, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.417354", + "step": 1279, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039847102016210556, + "timestamp": "2025-10-01 03:22:34.440925", + "step": 1280, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:22:34.471394", + "step": 1280, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017964264377951622, + "timestamp": "2025-10-01 03:22:34.473351", + "step": 1281, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.506301", + "step": 1281, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03005450777709484, + "timestamp": "2025-10-01 03:22:34.508277", + "step": 1282, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:34.538693", + "step": 1282, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03482417017221451, + "timestamp": "2025-10-01 03:22:34.540879", + "step": 1283, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:34.573930", + "step": 1283, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02992815151810646, + "timestamp": "2025-10-01 03:22:34.597588", + "step": 1284, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.627931", + "step": 1284, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035674113780260086, + "timestamp": "2025-10-01 03:22:34.630046", + "step": 1285, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.661902", + "step": 1285, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.047208283096551895, + "timestamp": "2025-10-01 03:22:34.663889", + "step": 1286, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.699722", + "step": 1286, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0201954897493124, + "timestamp": "2025-10-01 03:22:34.702020", + "step": 1287, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.736088", + "step": 1287, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003943244926631451, + "timestamp": "2025-10-01 03:22:34.759604", + "step": 1288, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.790521", + "step": 1288, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0486120842397213, + "timestamp": "2025-10-01 03:22:34.792761", + "step": 1289, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:34.832161", + "step": 1289, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07320515811443329, + "timestamp": "2025-10-01 03:22:34.834032", + "step": 1290, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:34.873182", + "step": 1290, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08092663437128067, + "timestamp": "2025-10-01 03:22:34.875263", + "step": 1291, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.906204", + "step": 1291, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06909819692373276, + "timestamp": "2025-10-01 03:22:34.929675", + "step": 1292, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:34.967650", + "step": 1292, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02019626460969448, + "timestamp": "2025-10-01 03:22:34.970140", + "step": 1293, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:35.003303", + "step": 1293, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06644660979509354, + "timestamp": "2025-10-01 03:22:35.005233", + "step": 1294, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.037377", + "step": 1294, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040734730660915375, + "timestamp": "2025-10-01 03:22:35.039461", + "step": 1295, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:35.071256", + "step": 1295, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02875741757452488, + "timestamp": "2025-10-01 03:22:35.094624", + "step": 1296, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.126072", + "step": 1296, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02143634483218193, + "timestamp": "2025-10-01 03:22:35.128163", + "step": 1297, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:35.162781", + "step": 1297, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012726819142699242, + "timestamp": "2025-10-01 03:22:35.164910", + "step": 1298, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.200305", + "step": 1298, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02654067613184452, + "timestamp": "2025-10-01 03:22:35.203442", + "step": 1299, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:35.235297", + "step": 1299, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027315856888890266, + "timestamp": "2025-10-01 03:22:35.258456", + "step": 1300, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.292502", + "step": 1300, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024185817688703537, + "timestamp": "2025-10-01 03:22:35.296567", + "step": 1301, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.329464", + "step": 1301, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03471171483397484, + "timestamp": "2025-10-01 03:22:35.331507", + "step": 1302, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.361343", + "step": 1302, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013062400743365288, + "timestamp": "2025-10-01 03:22:35.363216", + "step": 1303, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:35.394269", + "step": 1303, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015476690605282784, + "timestamp": "2025-10-01 03:22:35.417752", + "step": 1304, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.450398", + "step": 1304, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01629726216197014, + "timestamp": "2025-10-01 03:22:35.452583", + "step": 1305, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.483350", + "step": 1305, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002965002553537488, + "timestamp": "2025-10-01 03:22:35.485494", + "step": 1306, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.515494", + "step": 1306, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023044219240546227, + "timestamp": "2025-10-01 03:22:35.517764", + "step": 1307, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:35.549655", + "step": 1307, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024610161781311035, + "timestamp": "2025-10-01 03:22:35.573520", + "step": 1308, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:35.604569", + "step": 1308, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05056696757674217, + "timestamp": "2025-10-01 03:22:35.606823", + "step": 1309, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.637429", + "step": 1309, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05805463343858719, + "timestamp": "2025-10-01 03:22:35.639074", + "step": 1310, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.669747", + "step": 1310, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021922463551163673, + "timestamp": "2025-10-01 03:22:35.672362", + "step": 1311, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:35.703903", + "step": 1311, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014968113042414188, + "timestamp": "2025-10-01 03:22:35.727833", + "step": 1312, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:35.761549", + "step": 1312, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020951390266418457, + "timestamp": "2025-10-01 03:22:35.763564", + "step": 1313, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.795594", + "step": 1313, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.056527040898799896, + "timestamp": "2025-10-01 03:22:35.797858", + "step": 1314, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.828078", + "step": 1314, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033260416239500046, + "timestamp": "2025-10-01 03:22:35.830641", + "step": 1315, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:35.862223", + "step": 1315, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06796830147504807, + "timestamp": "2025-10-01 03:22:35.885659", + "step": 1316, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.919419", + "step": 1316, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001719654886983335, + "timestamp": "2025-10-01 03:22:35.921756", + "step": 1317, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:35.952217", + "step": 1317, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.053609322756528854, + "timestamp": "2025-10-01 03:22:35.953701", + "step": 1318, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:35.984881", + "step": 1318, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0026025448460131884, + "timestamp": "2025-10-01 03:22:35.986803", + "step": 1319, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.021735", + "step": 1319, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07601017504930496, + "timestamp": "2025-10-01 03:22:36.045453", + "step": 1320, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.076262", + "step": 1320, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07948096096515656, + "timestamp": "2025-10-01 03:22:36.078078", + "step": 1321, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.109092", + "step": 1321, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027765600010752678, + "timestamp": "2025-10-01 03:22:36.111944", + "step": 1322, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:36.145202", + "step": 1322, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05680333450436592, + "timestamp": "2025-10-01 03:22:36.149295", + "step": 1323, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.183005", + "step": 1323, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.052667099982500076, + "timestamp": "2025-10-01 03:22:36.206526", + "step": 1324, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.238326", + "step": 1324, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01599128544330597, + "timestamp": "2025-10-01 03:22:36.240919", + "step": 1325, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.273007", + "step": 1325, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03598534315824509, + "timestamp": "2025-10-01 03:22:36.275527", + "step": 1326, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.307625", + "step": 1326, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03676914796233177, + "timestamp": "2025-10-01 03:22:36.309932", + "step": 1327, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:36.341334", + "step": 1327, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038830555975437164, + "timestamp": "2025-10-01 03:22:36.366451", + "step": 1328, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.398509", + "step": 1328, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03525104746222496, + "timestamp": "2025-10-01 03:22:36.400846", + "step": 1329, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.432281", + "step": 1329, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028545614331960678, + "timestamp": "2025-10-01 03:22:36.434373", + "step": 1330, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.465780", + "step": 1330, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0542067289352417, + "timestamp": "2025-10-01 03:22:36.468096", + "step": 1331, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.499406", + "step": 1331, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04206712171435356, + "timestamp": "2025-10-01 03:22:36.523129", + "step": 1332, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.553719", + "step": 1332, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01217427384108305, + "timestamp": "2025-10-01 03:22:36.555729", + "step": 1333, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:36.585468", + "step": 1333, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03273894637823105, + "timestamp": "2025-10-01 03:22:36.587769", + "step": 1334, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.618700", + "step": 1334, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05500181391835213, + "timestamp": "2025-10-01 03:22:36.620881", + "step": 1335, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.651791", + "step": 1335, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028613192960619926, + "timestamp": "2025-10-01 03:22:36.675617", + "step": 1336, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.708398", + "step": 1336, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043318379670381546, + "timestamp": "2025-10-01 03:22:36.710643", + "step": 1337, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:36.742810", + "step": 1337, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06373867392539978, + "timestamp": "2025-10-01 03:22:36.745044", + "step": 1338, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.778747", + "step": 1338, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03607192263007164, + "timestamp": "2025-10-01 03:22:36.780792", + "step": 1339, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:36.815534", + "step": 1339, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06588114798069, + "timestamp": "2025-10-01 03:22:36.839596", + "step": 1340, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.873598", + "step": 1340, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04470011964440346, + "timestamp": "2025-10-01 03:22:36.875667", + "step": 1341, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.909371", + "step": 1341, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024116476997733116, + "timestamp": "2025-10-01 03:22:36.912054", + "step": 1342, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:36.945863", + "step": 1342, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032112590968608856, + "timestamp": "2025-10-01 03:22:36.948044", + "step": 1343, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:36.979859", + "step": 1343, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03350060060620308, + "timestamp": "2025-10-01 03:22:37.003669", + "step": 1344, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.033944", + "step": 1344, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014154133386909962, + "timestamp": "2025-10-01 03:22:37.036028", + "step": 1345, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.065676", + "step": 1345, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0512540340423584, + "timestamp": "2025-10-01 03:22:37.067639", + "step": 1346, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.097811", + "step": 1346, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027853118255734444, + "timestamp": "2025-10-01 03:22:37.099830", + "step": 1347, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.130275", + "step": 1347, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034965239465236664, + "timestamp": "2025-10-01 03:22:37.153753", + "step": 1348, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.186614", + "step": 1348, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022414792329072952, + "timestamp": "2025-10-01 03:22:37.188819", + "step": 1349, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:37.219816", + "step": 1349, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010126887820661068, + "timestamp": "2025-10-01 03:22:37.221815", + "step": 1350, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.253017", + "step": 1350, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04373135417699814, + "timestamp": "2025-10-01 03:22:37.255207", + "step": 1351, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.285970", + "step": 1351, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03360632061958313, + "timestamp": "2025-10-01 03:22:37.310684", + "step": 1352, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:37.344327", + "step": 1352, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04645494744181633, + "timestamp": "2025-10-01 03:22:37.350872", + "step": 1353, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.385048", + "step": 1353, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05881597846746445, + "timestamp": "2025-10-01 03:22:37.394157", + "step": 1354, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.426651", + "step": 1354, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03855180740356445, + "timestamp": "2025-10-01 03:22:37.429416", + "step": 1355, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.460354", + "step": 1355, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025764349848031998, + "timestamp": "2025-10-01 03:22:37.483988", + "step": 1356, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:37.513911", + "step": 1356, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04171048849821091, + "timestamp": "2025-10-01 03:22:37.516075", + "step": 1357, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.545997", + "step": 1357, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.059660669416189194, + "timestamp": "2025-10-01 03:22:37.547938", + "step": 1358, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.577996", + "step": 1358, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03330303728580475, + "timestamp": "2025-10-01 03:22:37.579962", + "step": 1359, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:37.610261", + "step": 1359, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01565561443567276, + "timestamp": "2025-10-01 03:22:37.633868", + "step": 1360, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.663833", + "step": 1360, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043623823672533035, + "timestamp": "2025-10-01 03:22:37.666625", + "step": 1361, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:22:37.696861", + "step": 1361, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04028450697660446, + "timestamp": "2025-10-01 03:22:37.699458", + "step": 1362, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:37.729531", + "step": 1362, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04316737502813339, + "timestamp": "2025-10-01 03:22:37.732642", + "step": 1363, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:37.762687", + "step": 1363, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018166929483413696, + "timestamp": "2025-10-01 03:22:37.786352", + "step": 1364, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.816780", + "step": 1364, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026632120832800865, + "timestamp": "2025-10-01 03:22:37.820235", + "step": 1365, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.852424", + "step": 1365, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03125634416937828, + "timestamp": "2025-10-01 03:22:37.854659", + "step": 1366, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:37.884396", + "step": 1366, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03156059980392456, + "timestamp": "2025-10-01 03:22:37.886649", + "step": 1367, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:37.916370", + "step": 1367, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040580008178949356, + "timestamp": "2025-10-01 03:22:37.939848", + "step": 1368, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:22:40.082389", + "step": 1368, + "epoch": 1 + }, + { + "type": "pplx", + "content": 2754921.737897749, + "timestamp": "2025-10-01 03:22:40.084663", + "step": 1368, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.114643", + "step": 1368, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0611400380730629, + "timestamp": "2025-10-01 03:22:40.116899", + "step": 1369, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.149252", + "step": 1369, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03842297941446304, + "timestamp": "2025-10-01 03:22:40.151365", + "step": 1370, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:40.182308", + "step": 1370, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02018883265554905, + "timestamp": "2025-10-01 03:22:40.184462", + "step": 1371, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.215370", + "step": 1371, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04728811979293823, + "timestamp": "2025-10-01 03:22:40.239396", + "step": 1372, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.271217", + "step": 1372, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04785343259572983, + "timestamp": "2025-10-01 03:22:40.273451", + "step": 1373, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.304080", + "step": 1373, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03520013764500618, + "timestamp": "2025-10-01 03:22:40.306807", + "step": 1374, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.339319", + "step": 1374, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020168231800198555, + "timestamp": "2025-10-01 03:22:40.341784", + "step": 1375, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.372467", + "step": 1375, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034403081983327866, + "timestamp": "2025-10-01 03:22:40.396721", + "step": 1376, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.427185", + "step": 1376, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04172354191541672, + "timestamp": "2025-10-01 03:22:40.429300", + "step": 1377, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.460362", + "step": 1377, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04928259924054146, + "timestamp": "2025-10-01 03:22:40.462884", + "step": 1378, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.496007", + "step": 1378, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041542764753103256, + "timestamp": "2025-10-01 03:22:40.498142", + "step": 1379, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:40.534221", + "step": 1379, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07669191807508469, + "timestamp": "2025-10-01 03:22:40.557995", + "step": 1380, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.588546", + "step": 1380, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.055663857609033585, + "timestamp": "2025-10-01 03:22:40.590684", + "step": 1381, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.621606", + "step": 1381, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.056379616260528564, + "timestamp": "2025-10-01 03:22:40.623853", + "step": 1382, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.654577", + "step": 1382, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040043048560619354, + "timestamp": "2025-10-01 03:22:40.656791", + "step": 1383, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.686972", + "step": 1383, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019004421308636665, + "timestamp": "2025-10-01 03:22:40.710640", + "step": 1384, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.740755", + "step": 1384, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019521549344062805, + "timestamp": "2025-10-01 03:22:40.743475", + "step": 1385, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:40.777563", + "step": 1385, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019112320616841316, + "timestamp": "2025-10-01 03:22:40.779853", + "step": 1386, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.811236", + "step": 1386, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011841310188174248, + "timestamp": "2025-10-01 03:22:40.813436", + "step": 1387, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.843861", + "step": 1387, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03862259164452553, + "timestamp": "2025-10-01 03:22:40.867848", + "step": 1388, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.902888", + "step": 1388, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024829953908920288, + "timestamp": "2025-10-01 03:22:40.905200", + "step": 1389, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.935058", + "step": 1389, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036393385380506516, + "timestamp": "2025-10-01 03:22:40.937074", + "step": 1390, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:40.968231", + "step": 1390, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038207199424505234, + "timestamp": "2025-10-01 03:22:40.970444", + "step": 1391, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.000504", + "step": 1391, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025377580896019936, + "timestamp": "2025-10-01 03:22:41.029137", + "step": 1392, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.062245", + "step": 1392, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0313616581261158, + "timestamp": "2025-10-01 03:22:41.064474", + "step": 1393, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.094122", + "step": 1393, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011138908565044403, + "timestamp": "2025-10-01 03:22:41.096043", + "step": 1394, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.126152", + "step": 1394, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0104024987667799, + "timestamp": "2025-10-01 03:22:41.128351", + "step": 1395, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:41.159786", + "step": 1395, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02123299427330494, + "timestamp": "2025-10-01 03:22:41.183124", + "step": 1396, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.223563", + "step": 1396, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0077341594733297825, + "timestamp": "2025-10-01 03:22:41.225990", + "step": 1397, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.260644", + "step": 1397, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01736031472682953, + "timestamp": "2025-10-01 03:22:41.262759", + "step": 1398, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.293043", + "step": 1398, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029094083234667778, + "timestamp": "2025-10-01 03:22:41.296251", + "step": 1399, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.327876", + "step": 1399, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007952584885060787, + "timestamp": "2025-10-01 03:22:41.352842", + "step": 1400, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.384740", + "step": 1400, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025149453431367874, + "timestamp": "2025-10-01 03:22:41.387302", + "step": 1401, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:41.418156", + "step": 1401, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037520166486501694, + "timestamp": "2025-10-01 03:22:41.420157", + "step": 1402, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.449839", + "step": 1402, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02506840229034424, + "timestamp": "2025-10-01 03:22:41.452193", + "step": 1403, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.482072", + "step": 1403, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029291251674294472, + "timestamp": "2025-10-01 03:22:41.505418", + "step": 1404, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.535948", + "step": 1404, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06152721866965294, + "timestamp": "2025-10-01 03:22:41.538021", + "step": 1405, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:41.568534", + "step": 1405, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018037568777799606, + "timestamp": "2025-10-01 03:22:41.570557", + "step": 1406, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.601682", + "step": 1406, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05515247583389282, + "timestamp": "2025-10-01 03:22:41.603888", + "step": 1407, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.634720", + "step": 1407, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019824307411909103, + "timestamp": "2025-10-01 03:22:41.658286", + "step": 1408, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:41.689650", + "step": 1408, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06738387048244476, + "timestamp": "2025-10-01 03:22:41.691651", + "step": 1409, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.726524", + "step": 1409, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004336661193519831, + "timestamp": "2025-10-01 03:22:41.728573", + "step": 1410, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.759506", + "step": 1410, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0642029196023941, + "timestamp": "2025-10-01 03:22:41.761575", + "step": 1411, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.791434", + "step": 1411, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034154150635004044, + "timestamp": "2025-10-01 03:22:41.815048", + "step": 1412, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:41.845093", + "step": 1412, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026395803317427635, + "timestamp": "2025-10-01 03:22:41.847083", + "step": 1413, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.878740", + "step": 1413, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.052340563386678696, + "timestamp": "2025-10-01 03:22:41.880840", + "step": 1414, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.910957", + "step": 1414, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03697682172060013, + "timestamp": "2025-10-01 03:22:41.913129", + "step": 1415, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.943322", + "step": 1415, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030514759942889214, + "timestamp": "2025-10-01 03:22:41.967029", + "step": 1416, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:41.997944", + "step": 1416, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01958225853741169, + "timestamp": "2025-10-01 03:22:42.000110", + "step": 1417, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.030827", + "step": 1417, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015889586880803108, + "timestamp": "2025-10-01 03:22:42.033020", + "step": 1418, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.063730", + "step": 1418, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017701027914881706, + "timestamp": "2025-10-01 03:22:42.065753", + "step": 1419, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.095539", + "step": 1419, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009380257688462734, + "timestamp": "2025-10-01 03:22:42.119130", + "step": 1420, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.150420", + "step": 1420, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018180835992097855, + "timestamp": "2025-10-01 03:22:42.152680", + "step": 1421, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.183510", + "step": 1421, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02079252526164055, + "timestamp": "2025-10-01 03:22:42.186141", + "step": 1422, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:42.217897", + "step": 1422, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06306279450654984, + "timestamp": "2025-10-01 03:22:42.220003", + "step": 1423, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.250223", + "step": 1423, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028275834396481514, + "timestamp": "2025-10-01 03:22:42.273834", + "step": 1424, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.304039", + "step": 1424, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006551084574311972, + "timestamp": "2025-10-01 03:22:42.308279", + "step": 1425, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.340660", + "step": 1425, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06562679260969162, + "timestamp": "2025-10-01 03:22:42.343296", + "step": 1426, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.375076", + "step": 1426, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06468041986227036, + "timestamp": "2025-10-01 03:22:42.377955", + "step": 1427, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.408934", + "step": 1427, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03389763459563255, + "timestamp": "2025-10-01 03:22:42.433959", + "step": 1428, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.464445", + "step": 1428, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0729769840836525, + "timestamp": "2025-10-01 03:22:42.466637", + "step": 1429, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:42.498102", + "step": 1429, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06193125620484352, + "timestamp": "2025-10-01 03:22:42.500394", + "step": 1430, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.531925", + "step": 1430, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004986986052244902, + "timestamp": "2025-10-01 03:22:42.534036", + "step": 1431, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.565343", + "step": 1431, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005627994891256094, + "timestamp": "2025-10-01 03:22:42.589332", + "step": 1432, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.620354", + "step": 1432, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05702696368098259, + "timestamp": "2025-10-01 03:22:42.622665", + "step": 1433, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.653357", + "step": 1433, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04092434048652649, + "timestamp": "2025-10-01 03:22:42.655788", + "step": 1434, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.686898", + "step": 1434, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030553817749023438, + "timestamp": "2025-10-01 03:22:42.689499", + "step": 1435, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.721607", + "step": 1435, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05185190588235855, + "timestamp": "2025-10-01 03:22:42.747045", + "step": 1436, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.780980", + "step": 1436, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016239037737250328, + "timestamp": "2025-10-01 03:22:42.783495", + "step": 1437, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.816427", + "step": 1437, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001608603517524898, + "timestamp": "2025-10-01 03:22:42.819005", + "step": 1438, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.850930", + "step": 1438, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018547579646110535, + "timestamp": "2025-10-01 03:22:42.853918", + "step": 1439, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.886213", + "step": 1439, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04087230935692787, + "timestamp": "2025-10-01 03:22:42.910746", + "step": 1440, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:42.942385", + "step": 1440, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0098916906863451, + "timestamp": "2025-10-01 03:22:42.945189", + "step": 1441, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:42.976313", + "step": 1441, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06354203820228577, + "timestamp": "2025-10-01 03:22:42.979060", + "step": 1442, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.009828", + "step": 1442, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.053811587393283844, + "timestamp": "2025-10-01 03:22:43.012290", + "step": 1443, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.044154", + "step": 1443, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01857728697359562, + "timestamp": "2025-10-01 03:22:43.069289", + "step": 1444, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.100105", + "step": 1444, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.092307910323143, + "timestamp": "2025-10-01 03:22:43.103119", + "step": 1445, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:43.135819", + "step": 1445, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016925133764743805, + "timestamp": "2025-10-01 03:22:43.138334", + "step": 1446, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:43.169307", + "step": 1446, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025267088785767555, + "timestamp": "2025-10-01 03:22:43.171859", + "step": 1447, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.203673", + "step": 1447, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05325522646307945, + "timestamp": "2025-10-01 03:22:43.228050", + "step": 1448, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.261591", + "step": 1448, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05364271253347397, + "timestamp": "2025-10-01 03:22:43.265355", + "step": 1449, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.297679", + "step": 1449, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020566772669553757, + "timestamp": "2025-10-01 03:22:43.300625", + "step": 1450, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.333036", + "step": 1450, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025892380625009537, + "timestamp": "2025-10-01 03:22:43.337535", + "step": 1451, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.371727", + "step": 1451, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0034851464442908764, + "timestamp": "2025-10-01 03:22:43.398170", + "step": 1452, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.432231", + "step": 1452, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006704401690512896, + "timestamp": "2025-10-01 03:22:43.435476", + "step": 1453, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.468010", + "step": 1453, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02431972697377205, + "timestamp": "2025-10-01 03:22:43.472409", + "step": 1454, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.505632", + "step": 1454, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02304028905928135, + "timestamp": "2025-10-01 03:22:43.509089", + "step": 1455, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:43.540979", + "step": 1455, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028936993330717087, + "timestamp": "2025-10-01 03:22:43.565155", + "step": 1456, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.597224", + "step": 1456, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040272291749715805, + "timestamp": "2025-10-01 03:22:43.600000", + "step": 1457, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.630152", + "step": 1457, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04614953324198723, + "timestamp": "2025-10-01 03:22:43.632929", + "step": 1458, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.664442", + "step": 1458, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0353073813021183, + "timestamp": "2025-10-01 03:22:43.666966", + "step": 1459, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.699287", + "step": 1459, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027041004970669746, + "timestamp": "2025-10-01 03:22:43.723755", + "step": 1460, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:43.755698", + "step": 1460, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01464853435754776, + "timestamp": "2025-10-01 03:22:43.758425", + "step": 1461, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.793417", + "step": 1461, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04992125183343887, + "timestamp": "2025-10-01 03:22:43.795794", + "step": 1462, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.826942", + "step": 1462, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0179618988186121, + "timestamp": "2025-10-01 03:22:43.829609", + "step": 1463, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.860688", + "step": 1463, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013048936612904072, + "timestamp": "2025-10-01 03:22:43.884812", + "step": 1464, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.915919", + "step": 1464, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012816355563700199, + "timestamp": "2025-10-01 03:22:43.919477", + "step": 1465, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:43.952870", + "step": 1465, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07465708255767822, + "timestamp": "2025-10-01 03:22:43.956242", + "step": 1466, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:43.987790", + "step": 1466, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03602162003517151, + "timestamp": "2025-10-01 03:22:43.990106", + "step": 1467, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.021042", + "step": 1467, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014003467746078968, + "timestamp": "2025-10-01 03:22:44.045253", + "step": 1468, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.076781", + "step": 1468, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01417274959385395, + "timestamp": "2025-10-01 03:22:44.079522", + "step": 1469, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.110401", + "step": 1469, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021150678396224976, + "timestamp": "2025-10-01 03:22:44.113177", + "step": 1470, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.145218", + "step": 1470, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029539791867136955, + "timestamp": "2025-10-01 03:22:44.147736", + "step": 1471, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:44.178589", + "step": 1471, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06272576749324799, + "timestamp": "2025-10-01 03:22:44.202576", + "step": 1472, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.233189", + "step": 1472, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032443247735500336, + "timestamp": "2025-10-01 03:22:44.237353", + "step": 1473, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.268237", + "step": 1473, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027066918089985847, + "timestamp": "2025-10-01 03:22:44.271086", + "step": 1474, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.302246", + "step": 1474, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037964414805173874, + "timestamp": "2025-10-01 03:22:44.304915", + "step": 1475, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:44.335997", + "step": 1475, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010303040035068989, + "timestamp": "2025-10-01 03:22:44.361661", + "step": 1476, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.393405", + "step": 1476, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012918449938297272, + "timestamp": "2025-10-01 03:22:44.397273", + "step": 1477, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:44.429331", + "step": 1477, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03116931952536106, + "timestamp": "2025-10-01 03:22:44.432864", + "step": 1478, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.465303", + "step": 1478, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0404604934155941, + "timestamp": "2025-10-01 03:22:44.469894", + "step": 1479, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.502269", + "step": 1479, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03032468445599079, + "timestamp": "2025-10-01 03:22:44.526377", + "step": 1480, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.556808", + "step": 1480, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023544101044535637, + "timestamp": "2025-10-01 03:22:44.558891", + "step": 1481, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.589504", + "step": 1481, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014256699942052364, + "timestamp": "2025-10-01 03:22:44.591640", + "step": 1482, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.622674", + "step": 1482, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009589208289980888, + "timestamp": "2025-10-01 03:22:44.624897", + "step": 1483, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:44.656074", + "step": 1483, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03881119564175606, + "timestamp": "2025-10-01 03:22:44.679969", + "step": 1484, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.716485", + "step": 1484, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04559272155165672, + "timestamp": "2025-10-01 03:22:44.718739", + "step": 1485, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.751521", + "step": 1485, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023974508047103882, + "timestamp": "2025-10-01 03:22:44.753654", + "step": 1486, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.784456", + "step": 1486, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015476835891604424, + "timestamp": "2025-10-01 03:22:44.786860", + "step": 1487, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:44.818451", + "step": 1487, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019553249701857567, + "timestamp": "2025-10-01 03:22:44.842016", + "step": 1488, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.872400", + "step": 1488, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03825373575091362, + "timestamp": "2025-10-01 03:22:44.874724", + "step": 1489, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.904754", + "step": 1489, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04481479898095131, + "timestamp": "2025-10-01 03:22:44.906976", + "step": 1490, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:44.937389", + "step": 1490, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07270359992980957, + "timestamp": "2025-10-01 03:22:44.939845", + "step": 1491, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:44.973885", + "step": 1491, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040402818471193314, + "timestamp": "2025-10-01 03:22:44.997548", + "step": 1492, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:45.029552", + "step": 1492, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02478523552417755, + "timestamp": "2025-10-01 03:22:45.031524", + "step": 1493, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:45.062571", + "step": 1493, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021775351837277412, + "timestamp": "2025-10-01 03:22:45.064628", + "step": 1494, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:45.095860", + "step": 1494, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023913297802209854, + "timestamp": "2025-10-01 03:22:45.098259", + "step": 1495, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:45.128245", + "step": 1495, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028492525219917297, + "timestamp": "2025-10-01 03:22:45.151923", + "step": 1496, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:45.181766", + "step": 1496, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028526397421956062, + "timestamp": "2025-10-01 03:22:45.184135", + "step": 1497, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:45.214159", + "step": 1497, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03471958637237549, + "timestamp": "2025-10-01 03:22:45.216328", + "step": 1498, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:45.246414", + "step": 1498, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031066035851836205, + "timestamp": "2025-10-01 03:22:45.248358", + "step": 1499, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:45.278243", + "step": 1499, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015005991794168949, + "timestamp": "2025-10-01 03:22:45.301739", + "step": 1500, + "epoch": 1 + }, + { + "type": "info", + "content": "Checkpoint saved at step 1500", + "timestamp": "2025-10-01 03:22:50.618354", + "step": 1500, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:50.658920", + "step": 1500, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03020867146551609, + "timestamp": "2025-10-01 03:22:50.661095", + "step": 1501, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:50.695327", + "step": 1501, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03019590489566326, + "timestamp": "2025-10-01 03:22:50.698546", + "step": 1502, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:50.729641", + "step": 1502, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016285674646496773, + "timestamp": "2025-10-01 03:22:50.731591", + "step": 1503, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:50.762931", + "step": 1503, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.048125576227903366, + "timestamp": "2025-10-01 03:22:50.787783", + "step": 1504, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:50.820959", + "step": 1504, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03289828822016716, + "timestamp": "2025-10-01 03:22:50.823583", + "step": 1505, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:50.856431", + "step": 1505, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022784264758229256, + "timestamp": "2025-10-01 03:22:50.860983", + "step": 1506, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:50.893153", + "step": 1506, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07217764854431152, + "timestamp": "2025-10-01 03:22:50.897209", + "step": 1507, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:50.931632", + "step": 1507, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016004299744963646, + "timestamp": "2025-10-01 03:22:50.956010", + "step": 1508, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:50.988822", + "step": 1508, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05344203859567642, + "timestamp": "2025-10-01 03:22:50.991809", + "step": 1509, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:51.024692", + "step": 1509, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05643419548869133, + "timestamp": "2025-10-01 03:22:51.028915", + "step": 1510, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:51.062839", + "step": 1510, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.058863330632448196, + "timestamp": "2025-10-01 03:22:51.065278", + "step": 1511, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:51.097621", + "step": 1511, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04892553761601448, + "timestamp": "2025-10-01 03:22:51.123737", + "step": 1512, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:51.156911", + "step": 1512, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04827268794178963, + "timestamp": "2025-10-01 03:22:51.159630", + "step": 1513, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:51.191759", + "step": 1513, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028900301083922386, + "timestamp": "2025-10-01 03:22:51.194531", + "step": 1514, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:51.230389", + "step": 1514, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024857057258486748, + "timestamp": "2025-10-01 03:22:51.233054", + "step": 1515, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:51.264465", + "step": 1515, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006532709114253521, + "timestamp": "2025-10-01 03:22:51.288800", + "step": 1516, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:51.323558", + "step": 1516, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011952429078519344, + "timestamp": "2025-10-01 03:22:51.327306", + "step": 1517, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:51.363631", + "step": 1517, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019160494208335876, + "timestamp": "2025-10-01 03:22:51.366381", + "step": 1518, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:51.401587", + "step": 1518, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02009795606136322, + "timestamp": "2025-10-01 03:22:51.404808", + "step": 1519, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:51.438272", + "step": 1519, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03794002905488014, + "timestamp": "2025-10-01 03:22:51.464847", + "step": 1520, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:22:53.656973", + "step": 1520, + "epoch": 1 + }, + { + "type": "pplx", + "content": 2907777.46771628, + "timestamp": "2025-10-01 03:22:53.659201", + "step": 1520, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:53.688450", + "step": 1520, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0287968497723341, + "timestamp": "2025-10-01 03:22:53.690464", + "step": 1521, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:53.720529", + "step": 1521, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022203009575605392, + "timestamp": "2025-10-01 03:22:53.722577", + "step": 1522, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:53.753414", + "step": 1522, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026439892128109932, + "timestamp": "2025-10-01 03:22:53.755337", + "step": 1523, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:53.784944", + "step": 1523, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039714641869068146, + "timestamp": "2025-10-01 03:22:53.808754", + "step": 1524, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:53.839936", + "step": 1524, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02708612196147442, + "timestamp": "2025-10-01 03:22:53.841945", + "step": 1525, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:53.872314", + "step": 1525, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07684605568647385, + "timestamp": "2025-10-01 03:22:53.874646", + "step": 1526, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:53.905525", + "step": 1526, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03170578554272652, + "timestamp": "2025-10-01 03:22:53.908034", + "step": 1527, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:53.939417", + "step": 1527, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07692592591047287, + "timestamp": "2025-10-01 03:22:53.963475", + "step": 1528, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:53.997034", + "step": 1528, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04142181947827339, + "timestamp": "2025-10-01 03:22:53.999161", + "step": 1529, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.029264", + "step": 1529, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009803930297493935, + "timestamp": "2025-10-01 03:22:54.031363", + "step": 1530, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.061452", + "step": 1530, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04753532260656357, + "timestamp": "2025-10-01 03:22:54.063770", + "step": 1531, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:54.095115", + "step": 1531, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049093496054410934, + "timestamp": "2025-10-01 03:22:54.118631", + "step": 1532, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.148761", + "step": 1532, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020062992349267006, + "timestamp": "2025-10-01 03:22:54.151208", + "step": 1533, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.182708", + "step": 1533, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032627273350954056, + "timestamp": "2025-10-01 03:22:54.184806", + "step": 1534, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.214997", + "step": 1534, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04134919494390488, + "timestamp": "2025-10-01 03:22:54.217403", + "step": 1535, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.247679", + "step": 1535, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033880431205034256, + "timestamp": "2025-10-01 03:22:54.271269", + "step": 1536, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.301408", + "step": 1536, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02768082357943058, + "timestamp": "2025-10-01 03:22:54.303418", + "step": 1537, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.333364", + "step": 1537, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017309319227933884, + "timestamp": "2025-10-01 03:22:54.335437", + "step": 1538, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.366876", + "step": 1538, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05270988121628761, + "timestamp": "2025-10-01 03:22:54.370216", + "step": 1539, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:22:54.400765", + "step": 1539, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03912487253546715, + "timestamp": "2025-10-01 03:22:54.424358", + "step": 1540, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:54.457106", + "step": 1540, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04787612333893776, + "timestamp": "2025-10-01 03:22:54.459584", + "step": 1541, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.491204", + "step": 1541, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025021547451615334, + "timestamp": "2025-10-01 03:22:54.493825", + "step": 1542, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.524720", + "step": 1542, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004313284065574408, + "timestamp": "2025-10-01 03:22:54.528125", + "step": 1543, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.559903", + "step": 1543, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03157897666096687, + "timestamp": "2025-10-01 03:22:54.583677", + "step": 1544, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.614501", + "step": 1544, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038849521428346634, + "timestamp": "2025-10-01 03:22:54.616427", + "step": 1545, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.648023", + "step": 1545, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033415041863918304, + "timestamp": "2025-10-01 03:22:54.650656", + "step": 1546, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.680558", + "step": 1546, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05603041499853134, + "timestamp": "2025-10-01 03:22:54.682761", + "step": 1547, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.712628", + "step": 1547, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019943905994296074, + "timestamp": "2025-10-01 03:22:54.736215", + "step": 1548, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.767161", + "step": 1548, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03230297937989235, + "timestamp": "2025-10-01 03:22:54.781973", + "step": 1549, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:22:54.821498", + "step": 1549, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.045984767377376556, + "timestamp": "2025-10-01 03:22:54.823743", + "step": 1550, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.854333", + "step": 1550, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018139218911528587, + "timestamp": "2025-10-01 03:22:54.856352", + "step": 1551, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.887548", + "step": 1551, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030153576284646988, + "timestamp": "2025-10-01 03:22:54.911134", + "step": 1552, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:54.941539", + "step": 1552, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0348358117043972, + "timestamp": "2025-10-01 03:22:54.943828", + "step": 1553, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:54.973954", + "step": 1553, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024069083854556084, + "timestamp": "2025-10-01 03:22:54.975909", + "step": 1554, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.007039", + "step": 1554, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049790848046541214, + "timestamp": "2025-10-01 03:22:55.009313", + "step": 1555, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.039976", + "step": 1555, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.053462181240320206, + "timestamp": "2025-10-01 03:22:55.063654", + "step": 1556, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:55.094307", + "step": 1556, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04032471776008606, + "timestamp": "2025-10-01 03:22:55.096630", + "step": 1557, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.127396", + "step": 1557, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031790830194950104, + "timestamp": "2025-10-01 03:22:55.129433", + "step": 1558, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.159954", + "step": 1558, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034965746104717255, + "timestamp": "2025-10-01 03:22:55.161865", + "step": 1559, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.191666", + "step": 1559, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02220350317656994, + "timestamp": "2025-10-01 03:22:55.217361", + "step": 1560, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:55.248373", + "step": 1560, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014945468865334988, + "timestamp": "2025-10-01 03:22:55.250328", + "step": 1561, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.280671", + "step": 1561, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020286796614527702, + "timestamp": "2025-10-01 03:22:55.282808", + "step": 1562, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.312993", + "step": 1562, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027349531650543213, + "timestamp": "2025-10-01 03:22:55.315055", + "step": 1563, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-10-01 03:22:55.349709", + "step": 1563, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04677576199173927, + "timestamp": "2025-10-01 03:22:55.373442", + "step": 1564, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:55.412322", + "step": 1564, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05075197294354439, + "timestamp": "2025-10-01 03:22:55.414430", + "step": 1565, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.444774", + "step": 1565, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029452115297317505, + "timestamp": "2025-10-01 03:22:55.446884", + "step": 1566, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.481200", + "step": 1566, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0165871549397707, + "timestamp": "2025-10-01 03:22:55.482898", + "step": 1567, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.513881", + "step": 1567, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04124506562948227, + "timestamp": "2025-10-01 03:22:55.537594", + "step": 1568, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.568082", + "step": 1568, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.052491288632154465, + "timestamp": "2025-10-01 03:22:55.570051", + "step": 1569, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.600214", + "step": 1569, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0182043369859457, + "timestamp": "2025-10-01 03:22:55.602245", + "step": 1570, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.633247", + "step": 1570, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04385269060730934, + "timestamp": "2025-10-01 03:22:55.635524", + "step": 1571, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.666571", + "step": 1571, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013915793038904667, + "timestamp": "2025-10-01 03:22:55.690502", + "step": 1572, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:55.722384", + "step": 1572, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07137653231620789, + "timestamp": "2025-10-01 03:22:55.724287", + "step": 1573, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:55.755300", + "step": 1573, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020984111353754997, + "timestamp": "2025-10-01 03:22:55.757635", + "step": 1574, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.788366", + "step": 1574, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04700655862689018, + "timestamp": "2025-10-01 03:22:55.790306", + "step": 1575, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.820573", + "step": 1575, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017637085169553757, + "timestamp": "2025-10-01 03:22:55.844190", + "step": 1576, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.874963", + "step": 1576, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.045443546026945114, + "timestamp": "2025-10-01 03:22:55.876742", + "step": 1577, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:55.907130", + "step": 1577, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03145222365856171, + "timestamp": "2025-10-01 03:22:55.909031", + "step": 1578, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:55.939079", + "step": 1578, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03239935263991356, + "timestamp": "2025-10-01 03:22:55.940896", + "step": 1579, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:55.970462", + "step": 1579, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04658617824316025, + "timestamp": "2025-10-01 03:22:55.995909", + "step": 1580, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.025782", + "step": 1580, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04006450995802879, + "timestamp": "2025-10-01 03:22:56.027423", + "step": 1581, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.057324", + "step": 1581, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.058593299239873886, + "timestamp": "2025-10-01 03:22:56.059192", + "step": 1582, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.093981", + "step": 1582, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008574661798775196, + "timestamp": "2025-10-01 03:22:56.096111", + "step": 1583, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.125393", + "step": 1583, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05844671279191971, + "timestamp": "2025-10-01 03:22:56.149165", + "step": 1584, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.179434", + "step": 1584, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038033097982406616, + "timestamp": "2025-10-01 03:22:56.181767", + "step": 1585, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:56.212955", + "step": 1585, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05003660172224045, + "timestamp": "2025-10-01 03:22:56.215220", + "step": 1586, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.246182", + "step": 1586, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06359200924634933, + "timestamp": "2025-10-01 03:22:56.248128", + "step": 1587, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:56.279022", + "step": 1587, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025316162034869194, + "timestamp": "2025-10-01 03:22:56.302370", + "step": 1588, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.332515", + "step": 1588, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03129833564162254, + "timestamp": "2025-10-01 03:22:56.334570", + "step": 1589, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.375722", + "step": 1589, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034001316875219345, + "timestamp": "2025-10-01 03:22:56.377220", + "step": 1590, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.407379", + "step": 1590, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04675035923719406, + "timestamp": "2025-10-01 03:22:56.409500", + "step": 1591, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.440410", + "step": 1591, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04448438435792923, + "timestamp": "2025-10-01 03:22:56.463678", + "step": 1592, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.494940", + "step": 1592, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035143397748470306, + "timestamp": "2025-10-01 03:22:56.497062", + "step": 1593, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.530696", + "step": 1593, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03081706538796425, + "timestamp": "2025-10-01 03:22:56.532712", + "step": 1594, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:56.563114", + "step": 1594, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04821237176656723, + "timestamp": "2025-10-01 03:22:56.564912", + "step": 1595, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.596213", + "step": 1595, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034842923283576965, + "timestamp": "2025-10-01 03:22:56.620050", + "step": 1596, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.650534", + "step": 1596, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031762510538101196, + "timestamp": "2025-10-01 03:22:56.652416", + "step": 1597, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.692747", + "step": 1597, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017483193427324295, + "timestamp": "2025-10-01 03:22:56.694827", + "step": 1598, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:56.725186", + "step": 1598, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03198308125138283, + "timestamp": "2025-10-01 03:22:56.727407", + "step": 1599, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.757885", + "step": 1599, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016522549092769623, + "timestamp": "2025-10-01 03:22:56.781525", + "step": 1600, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:56.811970", + "step": 1600, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036774810403585434, + "timestamp": "2025-10-01 03:22:56.813908", + "step": 1601, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.843979", + "step": 1601, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031194055452942848, + "timestamp": "2025-10-01 03:22:56.846045", + "step": 1602, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.876494", + "step": 1602, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022388484328985214, + "timestamp": "2025-10-01 03:22:56.878160", + "step": 1603, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.908575", + "step": 1603, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05473915860056877, + "timestamp": "2025-10-01 03:22:56.931840", + "step": 1604, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.962014", + "step": 1604, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03973185271024704, + "timestamp": "2025-10-01 03:22:56.963839", + "step": 1605, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:56.994212", + "step": 1605, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009785040281713009, + "timestamp": "2025-10-01 03:22:56.996083", + "step": 1606, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.026485", + "step": 1606, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04849451780319214, + "timestamp": "2025-10-01 03:22:57.028563", + "step": 1607, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.058481", + "step": 1607, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005391591228544712, + "timestamp": "2025-10-01 03:22:57.081924", + "step": 1608, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.112691", + "step": 1608, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021112503483891487, + "timestamp": "2025-10-01 03:22:57.114497", + "step": 1609, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:57.145307", + "step": 1609, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026741351932287216, + "timestamp": "2025-10-01 03:22:57.147518", + "step": 1610, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.177567", + "step": 1610, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041044365614652634, + "timestamp": "2025-10-01 03:22:57.179800", + "step": 1611, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:57.209527", + "step": 1611, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036396097391843796, + "timestamp": "2025-10-01 03:22:57.233043", + "step": 1612, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.263947", + "step": 1612, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014710577204823494, + "timestamp": "2025-10-01 03:22:57.265969", + "step": 1613, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.296351", + "step": 1613, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043970171362161636, + "timestamp": "2025-10-01 03:22:57.298297", + "step": 1614, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.328250", + "step": 1614, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03264318406581879, + "timestamp": "2025-10-01 03:22:57.330310", + "step": 1615, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.360390", + "step": 1615, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028366491198539734, + "timestamp": "2025-10-01 03:22:57.383770", + "step": 1616, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.415907", + "step": 1616, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03861703351140022, + "timestamp": "2025-10-01 03:22:57.417794", + "step": 1617, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.447781", + "step": 1617, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0676463171839714, + "timestamp": "2025-10-01 03:22:57.449776", + "step": 1618, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.482342", + "step": 1618, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04988948255777359, + "timestamp": "2025-10-01 03:22:57.485028", + "step": 1619, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.516482", + "step": 1619, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023593556135892868, + "timestamp": "2025-10-01 03:22:57.540130", + "step": 1620, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.571089", + "step": 1620, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015566953457891941, + "timestamp": "2025-10-01 03:22:57.572909", + "step": 1621, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.605847", + "step": 1621, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016939325258135796, + "timestamp": "2025-10-01 03:22:57.608015", + "step": 1622, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:57.639096", + "step": 1622, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05464351177215576, + "timestamp": "2025-10-01 03:22:57.641354", + "step": 1623, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.671600", + "step": 1623, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.058743443340063095, + "timestamp": "2025-10-01 03:22:57.695372", + "step": 1624, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.727481", + "step": 1624, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017531823366880417, + "timestamp": "2025-10-01 03:22:57.729561", + "step": 1625, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.760532", + "step": 1625, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03597472980618477, + "timestamp": "2025-10-01 03:22:57.762405", + "step": 1626, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.792985", + "step": 1626, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04052743688225746, + "timestamp": "2025-10-01 03:22:57.795255", + "step": 1627, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:57.826652", + "step": 1627, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04711382836103439, + "timestamp": "2025-10-01 03:22:57.850346", + "step": 1628, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.880866", + "step": 1628, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037611644715070724, + "timestamp": "2025-10-01 03:22:57.891178", + "step": 1629, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.921014", + "step": 1629, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008787617087364197, + "timestamp": "2025-10-01 03:22:57.922796", + "step": 1630, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.953153", + "step": 1630, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01174173317849636, + "timestamp": "2025-10-01 03:22:57.955475", + "step": 1631, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:57.985748", + "step": 1631, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03221336007118225, + "timestamp": "2025-10-01 03:22:58.009320", + "step": 1632, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.039105", + "step": 1632, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011356067843735218, + "timestamp": "2025-10-01 03:22:58.041111", + "step": 1633, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.081466", + "step": 1633, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03845294564962387, + "timestamp": "2025-10-01 03:22:58.083454", + "step": 1634, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:58.134933", + "step": 1634, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020526094362139702, + "timestamp": "2025-10-01 03:22:58.137161", + "step": 1635, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.167473", + "step": 1635, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016972502693533897, + "timestamp": "2025-10-01 03:22:58.190999", + "step": 1636, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.221989", + "step": 1636, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04222079738974571, + "timestamp": "2025-10-01 03:22:58.224159", + "step": 1637, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.256316", + "step": 1637, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03346877917647362, + "timestamp": "2025-10-01 03:22:58.259307", + "step": 1638, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.289398", + "step": 1638, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011588332243263721, + "timestamp": "2025-10-01 03:22:58.291309", + "step": 1639, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.322111", + "step": 1639, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01622639037668705, + "timestamp": "2025-10-01 03:22:58.347106", + "step": 1640, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.376922", + "step": 1640, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018987994641065598, + "timestamp": "2025-10-01 03:22:58.378808", + "step": 1641, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.408249", + "step": 1641, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0248205978423357, + "timestamp": "2025-10-01 03:22:58.410302", + "step": 1642, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.440925", + "step": 1642, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04954874515533447, + "timestamp": "2025-10-01 03:22:58.444009", + "step": 1643, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.475235", + "step": 1643, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037156734615564346, + "timestamp": "2025-10-01 03:22:58.499159", + "step": 1644, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.551292", + "step": 1644, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018544640392065048, + "timestamp": "2025-10-01 03:22:58.553858", + "step": 1645, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:58.616613", + "step": 1645, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01879114843904972, + "timestamp": "2025-10-01 03:22:58.618855", + "step": 1646, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.681734", + "step": 1646, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029126297682523727, + "timestamp": "2025-10-01 03:22:58.688847", + "step": 1647, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.746514", + "step": 1647, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.058656755834817886, + "timestamp": "2025-10-01 03:22:58.770270", + "step": 1648, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.833189", + "step": 1648, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022992881014943123, + "timestamp": "2025-10-01 03:22:58.835112", + "step": 1649, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.900194", + "step": 1649, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024786312133073807, + "timestamp": "2025-10-01 03:22:58.903553", + "step": 1650, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:58.967570", + "step": 1650, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027966441586613655, + "timestamp": "2025-10-01 03:22:58.969792", + "step": 1651, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.046034", + "step": 1651, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012475049123167992, + "timestamp": "2025-10-01 03:22:59.069691", + "step": 1652, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.100254", + "step": 1652, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06980372220277786, + "timestamp": "2025-10-01 03:22:59.102613", + "step": 1653, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:59.137694", + "step": 1653, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016910862177610397, + "timestamp": "2025-10-01 03:22:59.139878", + "step": 1654, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.170160", + "step": 1654, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02803618833422661, + "timestamp": "2025-10-01 03:22:59.172533", + "step": 1655, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.208143", + "step": 1655, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0392131507396698, + "timestamp": "2025-10-01 03:22:59.231820", + "step": 1656, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.265791", + "step": 1656, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03132575377821922, + "timestamp": "2025-10-01 03:22:59.268761", + "step": 1657, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.299615", + "step": 1657, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013887012377381325, + "timestamp": "2025-10-01 03:22:59.303066", + "step": 1658, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:59.335306", + "step": 1658, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03398435562849045, + "timestamp": "2025-10-01 03:22:59.337312", + "step": 1659, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.368161", + "step": 1659, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043612927198410034, + "timestamp": "2025-10-01 03:22:59.391768", + "step": 1660, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:59.426061", + "step": 1660, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01823958195745945, + "timestamp": "2025-10-01 03:22:59.428335", + "step": 1661, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.459613", + "step": 1661, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05323272570967674, + "timestamp": "2025-10-01 03:22:59.462011", + "step": 1662, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.492537", + "step": 1662, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022972390055656433, + "timestamp": "2025-10-01 03:22:59.498073", + "step": 1663, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.529454", + "step": 1663, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021354686468839645, + "timestamp": "2025-10-01 03:22:59.553327", + "step": 1664, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.587073", + "step": 1664, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0030967893544584513, + "timestamp": "2025-10-01 03:22:59.589071", + "step": 1665, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:22:59.621699", + "step": 1665, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017288299277424812, + "timestamp": "2025-10-01 03:22:59.624063", + "step": 1666, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.656102", + "step": 1666, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024238094687461853, + "timestamp": "2025-10-01 03:22:59.658337", + "step": 1667, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:59.702829", + "step": 1667, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02714201807975769, + "timestamp": "2025-10-01 03:22:59.726597", + "step": 1668, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:22:59.756517", + "step": 1668, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005459683947265148, + "timestamp": "2025-10-01 03:22:59.758377", + "step": 1669, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.789082", + "step": 1669, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05083584785461426, + "timestamp": "2025-10-01 03:22:59.791263", + "step": 1670, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.821172", + "step": 1670, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005478673614561558, + "timestamp": "2025-10-01 03:22:59.824546", + "step": 1671, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:22:59.856757", + "step": 1671, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04104964807629585, + "timestamp": "2025-10-01 03:22:59.880669", + "step": 1672, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:23:02.053618", + "step": 1672, + "epoch": 1 + }, + { + "type": "pplx", + "content": 3077077.4782778244, + "timestamp": "2025-10-01 03:23:02.058624", + "step": 1672, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:02.089211", + "step": 1672, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04063335806131363, + "timestamp": "2025-10-01 03:23:02.091334", + "step": 1673, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.122371", + "step": 1673, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036854714155197144, + "timestamp": "2025-10-01 03:23:02.124223", + "step": 1674, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:02.154734", + "step": 1674, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010753879323601723, + "timestamp": "2025-10-01 03:23:02.157355", + "step": 1675, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.187893", + "step": 1675, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027788234874606133, + "timestamp": "2025-10-01 03:23:02.211885", + "step": 1676, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.241864", + "step": 1676, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01872709020972252, + "timestamp": "2025-10-01 03:23:02.243860", + "step": 1677, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.274747", + "step": 1677, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03563961386680603, + "timestamp": "2025-10-01 03:23:02.276963", + "step": 1678, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.308157", + "step": 1678, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03009362705051899, + "timestamp": "2025-10-01 03:23:02.310307", + "step": 1679, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:02.341503", + "step": 1679, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03152616694569588, + "timestamp": "2025-10-01 03:23:02.366816", + "step": 1680, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.396825", + "step": 1680, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022868534550070763, + "timestamp": "2025-10-01 03:23:02.399021", + "step": 1681, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:02.431196", + "step": 1681, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02574176713824272, + "timestamp": "2025-10-01 03:23:02.433409", + "step": 1682, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.463548", + "step": 1682, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014888682402670383, + "timestamp": "2025-10-01 03:23:02.465615", + "step": 1683, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.497949", + "step": 1683, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008038986474275589, + "timestamp": "2025-10-01 03:23:02.522998", + "step": 1684, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.555592", + "step": 1684, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025325540453195572, + "timestamp": "2025-10-01 03:23:02.557861", + "step": 1685, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.588031", + "step": 1685, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0030537398997694254, + "timestamp": "2025-10-01 03:23:02.590375", + "step": 1686, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.620923", + "step": 1686, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01872585341334343, + "timestamp": "2025-10-01 03:23:02.623090", + "step": 1687, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.655315", + "step": 1687, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041651155799627304, + "timestamp": "2025-10-01 03:23:02.679014", + "step": 1688, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:02.712193", + "step": 1688, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022234786301851273, + "timestamp": "2025-10-01 03:23:02.714244", + "step": 1689, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:02.744846", + "step": 1689, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027052221819758415, + "timestamp": "2025-10-01 03:23:02.747126", + "step": 1690, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.777526", + "step": 1690, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033329159021377563, + "timestamp": "2025-10-01 03:23:02.779761", + "step": 1691, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.809701", + "step": 1691, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06446987390518188, + "timestamp": "2025-10-01 03:23:02.833481", + "step": 1692, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:02.863994", + "step": 1692, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025812264531850815, + "timestamp": "2025-10-01 03:23:02.866136", + "step": 1693, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.896337", + "step": 1693, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00608137296512723, + "timestamp": "2025-10-01 03:23:02.898434", + "step": 1694, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:02.929513", + "step": 1694, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04317405819892883, + "timestamp": "2025-10-01 03:23:02.931891", + "step": 1695, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:02.962608", + "step": 1695, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007819943130016327, + "timestamp": "2025-10-01 03:23:02.986387", + "step": 1696, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:03.016780", + "step": 1696, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023509616032242775, + "timestamp": "2025-10-01 03:23:03.018675", + "step": 1697, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.049878", + "step": 1697, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03156420961022377, + "timestamp": "2025-10-01 03:23:03.052049", + "step": 1698, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.083331", + "step": 1698, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032706353813409805, + "timestamp": "2025-10-01 03:23:03.085442", + "step": 1699, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.115895", + "step": 1699, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.048418980091810226, + "timestamp": "2025-10-01 03:23:03.139425", + "step": 1700, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.180072", + "step": 1700, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039208102971315384, + "timestamp": "2025-10-01 03:23:03.181972", + "step": 1701, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.212130", + "step": 1701, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0365779884159565, + "timestamp": "2025-10-01 03:23:03.214469", + "step": 1702, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.244209", + "step": 1702, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06408097594976425, + "timestamp": "2025-10-01 03:23:03.246064", + "step": 1703, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.276044", + "step": 1703, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011725892312824726, + "timestamp": "2025-10-01 03:23:03.299531", + "step": 1704, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.329417", + "step": 1704, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04659127816557884, + "timestamp": "2025-10-01 03:23:03.330916", + "step": 1705, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.359954", + "step": 1705, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024066729471087456, + "timestamp": "2025-10-01 03:23:03.362471", + "step": 1706, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:03.392911", + "step": 1706, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05637466162443161, + "timestamp": "2025-10-01 03:23:03.395087", + "step": 1707, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.425580", + "step": 1707, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01891154982149601, + "timestamp": "2025-10-01 03:23:03.449391", + "step": 1708, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.479843", + "step": 1708, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008880244567990303, + "timestamp": "2025-10-01 03:23:03.482329", + "step": 1709, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:23:03.512621", + "step": 1709, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014784888364374638, + "timestamp": "2025-10-01 03:23:03.514622", + "step": 1710, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:03.544862", + "step": 1710, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06318690627813339, + "timestamp": "2025-10-01 03:23:03.546990", + "step": 1711, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:03.578409", + "step": 1711, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0028774035163223743, + "timestamp": "2025-10-01 03:23:03.601894", + "step": 1712, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:03.633243", + "step": 1712, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008432609029114246, + "timestamp": "2025-10-01 03:23:03.635242", + "step": 1713, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.668090", + "step": 1713, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.062195148319005966, + "timestamp": "2025-10-01 03:23:03.670232", + "step": 1714, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.700457", + "step": 1714, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04492512717843056, + "timestamp": "2025-10-01 03:23:03.702454", + "step": 1715, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.732849", + "step": 1715, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00908811204135418, + "timestamp": "2025-10-01 03:23:03.756452", + "step": 1716, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:03.786946", + "step": 1716, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024427874013781548, + "timestamp": "2025-10-01 03:23:03.789051", + "step": 1717, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.819555", + "step": 1717, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.061334945261478424, + "timestamp": "2025-10-01 03:23:03.821942", + "step": 1718, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.861787", + "step": 1718, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07312075793743134, + "timestamp": "2025-10-01 03:23:03.863764", + "step": 1719, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.894949", + "step": 1719, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.048514414578676224, + "timestamp": "2025-10-01 03:23:03.918666", + "step": 1720, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:03.949120", + "step": 1720, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021591631695628166, + "timestamp": "2025-10-01 03:23:03.951367", + "step": 1721, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:03.981890", + "step": 1721, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03242766484618187, + "timestamp": "2025-10-01 03:23:03.984095", + "step": 1722, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.014230", + "step": 1722, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.047284871339797974, + "timestamp": "2025-10-01 03:23:04.016380", + "step": 1723, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.046561", + "step": 1723, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013731466606259346, + "timestamp": "2025-10-01 03:23:04.070332", + "step": 1724, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.100412", + "step": 1724, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010529102757573128, + "timestamp": "2025-10-01 03:23:04.102757", + "step": 1725, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:04.143032", + "step": 1725, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04431254416704178, + "timestamp": "2025-10-01 03:23:04.145169", + "step": 1726, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:04.176088", + "step": 1726, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01074214093387127, + "timestamp": "2025-10-01 03:23:04.178808", + "step": 1727, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.209425", + "step": 1727, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04322711005806923, + "timestamp": "2025-10-01 03:23:04.232748", + "step": 1728, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.270252", + "step": 1728, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018085239455103874, + "timestamp": "2025-10-01 03:23:04.272834", + "step": 1729, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.302686", + "step": 1729, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03554832935333252, + "timestamp": "2025-10-01 03:23:04.305292", + "step": 1730, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:23:04.335409", + "step": 1730, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01948263682425022, + "timestamp": "2025-10-01 03:23:04.337463", + "step": 1731, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.369130", + "step": 1731, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046531520783901215, + "timestamp": "2025-10-01 03:23:04.392904", + "step": 1732, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:23:04.424907", + "step": 1732, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014605559408664703, + "timestamp": "2025-10-01 03:23:04.426950", + "step": 1733, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:04.457677", + "step": 1733, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007533378899097443, + "timestamp": "2025-10-01 03:23:04.460638", + "step": 1734, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.492513", + "step": 1734, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03173818066716194, + "timestamp": "2025-10-01 03:23:04.494576", + "step": 1735, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.524944", + "step": 1735, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0535307452082634, + "timestamp": "2025-10-01 03:23:04.548708", + "step": 1736, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:04.580436", + "step": 1736, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031349506229162216, + "timestamp": "2025-10-01 03:23:04.582454", + "step": 1737, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.612319", + "step": 1737, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.055063098669052124, + "timestamp": "2025-10-01 03:23:04.620019", + "step": 1738, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.651528", + "step": 1738, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05409417673945427, + "timestamp": "2025-10-01 03:23:04.654324", + "step": 1739, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.684543", + "step": 1739, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006161712575703859, + "timestamp": "2025-10-01 03:23:04.710022", + "step": 1740, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:04.740027", + "step": 1740, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.042703114449977875, + "timestamp": "2025-10-01 03:23:04.742086", + "step": 1741, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.774384", + "step": 1741, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013602107763290405, + "timestamp": "2025-10-01 03:23:04.776427", + "step": 1742, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.807774", + "step": 1742, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01636463589966297, + "timestamp": "2025-10-01 03:23:04.811459", + "step": 1743, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.842833", + "step": 1743, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046282809227705, + "timestamp": "2025-10-01 03:23:04.866187", + "step": 1744, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.897519", + "step": 1744, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0054903156124055386, + "timestamp": "2025-10-01 03:23:04.899636", + "step": 1745, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.929885", + "step": 1745, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01956084743142128, + "timestamp": "2025-10-01 03:23:04.932360", + "step": 1746, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.962373", + "step": 1746, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04628860950469971, + "timestamp": "2025-10-01 03:23:04.964539", + "step": 1747, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:04.994957", + "step": 1747, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.052517201751470566, + "timestamp": "2025-10-01 03:23:05.018918", + "step": 1748, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.048925", + "step": 1748, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009116454049944878, + "timestamp": "2025-10-01 03:23:05.051021", + "step": 1749, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.082022", + "step": 1749, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.047176532447338104, + "timestamp": "2025-10-01 03:23:05.084138", + "step": 1750, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.115280", + "step": 1750, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021132931113243103, + "timestamp": "2025-10-01 03:23:05.117637", + "step": 1751, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:05.147922", + "step": 1751, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020636439323425293, + "timestamp": "2025-10-01 03:23:05.171570", + "step": 1752, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.202142", + "step": 1752, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02715233527123928, + "timestamp": "2025-10-01 03:23:05.204294", + "step": 1753, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.234587", + "step": 1753, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04120233282446861, + "timestamp": "2025-10-01 03:23:05.237058", + "step": 1754, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.268176", + "step": 1754, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034548766911029816, + "timestamp": "2025-10-01 03:23:05.270357", + "step": 1755, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.304634", + "step": 1755, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006471221800893545, + "timestamp": "2025-10-01 03:23:05.328153", + "step": 1756, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.358699", + "step": 1756, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.045453835278749466, + "timestamp": "2025-10-01 03:23:05.361264", + "step": 1757, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.391869", + "step": 1757, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02365672029554844, + "timestamp": "2025-10-01 03:23:05.394116", + "step": 1758, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:05.427168", + "step": 1758, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04407942295074463, + "timestamp": "2025-10-01 03:23:05.430132", + "step": 1759, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.460237", + "step": 1759, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016028642654418945, + "timestamp": "2025-10-01 03:23:05.483722", + "step": 1760, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.528285", + "step": 1760, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02633192576467991, + "timestamp": "2025-10-01 03:23:05.530752", + "step": 1761, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.560707", + "step": 1761, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03998719900846481, + "timestamp": "2025-10-01 03:23:05.562856", + "step": 1762, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.594265", + "step": 1762, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016595445573329926, + "timestamp": "2025-10-01 03:23:05.596297", + "step": 1763, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.627222", + "step": 1763, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022987840697169304, + "timestamp": "2025-10-01 03:23:05.650864", + "step": 1764, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.683234", + "step": 1764, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019253330305218697, + "timestamp": "2025-10-01 03:23:05.685106", + "step": 1765, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:05.720967", + "step": 1765, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023156393319368362, + "timestamp": "2025-10-01 03:23:05.722858", + "step": 1766, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.753055", + "step": 1766, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022717395797371864, + "timestamp": "2025-10-01 03:23:05.755041", + "step": 1767, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.785040", + "step": 1767, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01716155745089054, + "timestamp": "2025-10-01 03:23:05.808941", + "step": 1768, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.839110", + "step": 1768, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029725447297096252, + "timestamp": "2025-10-01 03:23:05.841430", + "step": 1769, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.871992", + "step": 1769, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03067905828356743, + "timestamp": "2025-10-01 03:23:05.874097", + "step": 1770, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.904301", + "step": 1770, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030985713005065918, + "timestamp": "2025-10-01 03:23:05.906295", + "step": 1771, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.936930", + "step": 1771, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025770684704184532, + "timestamp": "2025-10-01 03:23:05.960377", + "step": 1772, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:05.991050", + "step": 1772, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04055789113044739, + "timestamp": "2025-10-01 03:23:06.005316", + "step": 1773, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.036269", + "step": 1773, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0419662781059742, + "timestamp": "2025-10-01 03:23:06.044548", + "step": 1774, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.078911", + "step": 1774, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02665659599006176, + "timestamp": "2025-10-01 03:23:06.089760", + "step": 1775, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:23:06.122932", + "step": 1775, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017311032861471176, + "timestamp": "2025-10-01 03:23:06.146782", + "step": 1776, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.182517", + "step": 1776, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015052932314574718, + "timestamp": "2025-10-01 03:23:06.186262", + "step": 1777, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.224509", + "step": 1777, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01757754199206829, + "timestamp": "2025-10-01 03:23:06.227735", + "step": 1778, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.258615", + "step": 1778, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04767727851867676, + "timestamp": "2025-10-01 03:23:06.271603", + "step": 1779, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.314600", + "step": 1779, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.053343575447797775, + "timestamp": "2025-10-01 03:23:06.338295", + "step": 1780, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.368990", + "step": 1780, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008720478974282742, + "timestamp": "2025-10-01 03:23:06.371024", + "step": 1781, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:06.402546", + "step": 1781, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027352076023817062, + "timestamp": "2025-10-01 03:23:06.405039", + "step": 1782, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.436112", + "step": 1782, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034097395837306976, + "timestamp": "2025-10-01 03:23:06.438580", + "step": 1783, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.468614", + "step": 1783, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029568472877144814, + "timestamp": "2025-10-01 03:23:06.492427", + "step": 1784, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:06.522444", + "step": 1784, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00685260770842433, + "timestamp": "2025-10-01 03:23:06.524765", + "step": 1785, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:06.555249", + "step": 1785, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03409920632839203, + "timestamp": "2025-10-01 03:23:06.557263", + "step": 1786, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.587698", + "step": 1786, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004956855904310942, + "timestamp": "2025-10-01 03:23:06.590562", + "step": 1787, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.622806", + "step": 1787, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0077865347266197205, + "timestamp": "2025-10-01 03:23:06.646538", + "step": 1788, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.677591", + "step": 1788, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03323165327310562, + "timestamp": "2025-10-01 03:23:06.679753", + "step": 1789, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:06.710852", + "step": 1789, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02297314815223217, + "timestamp": "2025-10-01 03:23:06.712745", + "step": 1790, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.743135", + "step": 1790, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014177458360791206, + "timestamp": "2025-10-01 03:23:06.745251", + "step": 1791, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:06.776527", + "step": 1791, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012578285299241543, + "timestamp": "2025-10-01 03:23:06.800031", + "step": 1792, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.832700", + "step": 1792, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0344545841217041, + "timestamp": "2025-10-01 03:23:06.834939", + "step": 1793, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.865799", + "step": 1793, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02411268837749958, + "timestamp": "2025-10-01 03:23:06.868193", + "step": 1794, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.899733", + "step": 1794, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012195351533591747, + "timestamp": "2025-10-01 03:23:06.901917", + "step": 1795, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.932607", + "step": 1795, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04033414646983147, + "timestamp": "2025-10-01 03:23:06.956518", + "step": 1796, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:06.986951", + "step": 1796, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.045427072793245316, + "timestamp": "2025-10-01 03:23:06.989107", + "step": 1797, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.019247", + "step": 1797, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01938226819038391, + "timestamp": "2025-10-01 03:23:07.021691", + "step": 1798, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:07.051736", + "step": 1798, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038044098764657974, + "timestamp": "2025-10-01 03:23:07.053730", + "step": 1799, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.083989", + "step": 1799, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01782723143696785, + "timestamp": "2025-10-01 03:23:07.107522", + "step": 1800, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.139140", + "step": 1800, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02703261747956276, + "timestamp": "2025-10-01 03:23:07.141399", + "step": 1801, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.179919", + "step": 1801, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005780319683253765, + "timestamp": "2025-10-01 03:23:07.181878", + "step": 1802, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.212225", + "step": 1802, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01281543355435133, + "timestamp": "2025-10-01 03:23:07.214193", + "step": 1803, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:07.245142", + "step": 1803, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01555387768894434, + "timestamp": "2025-10-01 03:23:07.269009", + "step": 1804, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:07.299662", + "step": 1804, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02510373480618, + "timestamp": "2025-10-01 03:23:07.301874", + "step": 1805, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.332703", + "step": 1805, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02600158378481865, + "timestamp": "2025-10-01 03:23:07.334741", + "step": 1806, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.365038", + "step": 1806, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006069051567465067, + "timestamp": "2025-10-01 03:23:07.367401", + "step": 1807, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.397696", + "step": 1807, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.09269454330205917, + "timestamp": "2025-10-01 03:23:07.421174", + "step": 1808, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.451439", + "step": 1808, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03771915286779404, + "timestamp": "2025-10-01 03:23:07.453623", + "step": 1809, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.483886", + "step": 1809, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008594858460128307, + "timestamp": "2025-10-01 03:23:07.486178", + "step": 1810, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.517157", + "step": 1810, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04463183507323265, + "timestamp": "2025-10-01 03:23:07.519293", + "step": 1811, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.549880", + "step": 1811, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034610819071531296, + "timestamp": "2025-10-01 03:23:07.573411", + "step": 1812, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.603912", + "step": 1812, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015149049460887909, + "timestamp": "2025-10-01 03:23:07.606659", + "step": 1813, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.638114", + "step": 1813, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08113700896501541, + "timestamp": "2025-10-01 03:23:07.640251", + "step": 1814, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.671405", + "step": 1814, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.11413990706205368, + "timestamp": "2025-10-01 03:23:07.673534", + "step": 1815, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.705224", + "step": 1815, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01256456971168518, + "timestamp": "2025-10-01 03:23:07.728876", + "step": 1816, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.760105", + "step": 1816, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02249438874423504, + "timestamp": "2025-10-01 03:23:07.762208", + "step": 1817, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.792725", + "step": 1817, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015160753391683102, + "timestamp": "2025-10-01 03:23:07.795016", + "step": 1818, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:07.827423", + "step": 1818, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009806470014154911, + "timestamp": "2025-10-01 03:23:07.829748", + "step": 1819, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:07.859765", + "step": 1819, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026516374200582504, + "timestamp": "2025-10-01 03:23:07.883157", + "step": 1820, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.913626", + "step": 1820, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05197069048881531, + "timestamp": "2025-10-01 03:23:07.915682", + "step": 1821, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.947049", + "step": 1821, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015434383414685726, + "timestamp": "2025-10-01 03:23:07.949213", + "step": 1822, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:07.980861", + "step": 1822, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004216439556330442, + "timestamp": "2025-10-01 03:23:07.982752", + "step": 1823, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:08.013278", + "step": 1823, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021105051040649414, + "timestamp": "2025-10-01 03:23:08.037068", + "step": 1824, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:23:10.784919", + "step": 1824, + "epoch": 1 + }, + { + "type": "pplx", + "content": 3108775.8562042513, + "timestamp": "2025-10-01 03:23:10.790759", + "step": 1824, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:10.825100", + "step": 1824, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04505351185798645, + "timestamp": "2025-10-01 03:23:10.829518", + "step": 1825, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:10.865497", + "step": 1825, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015932640060782433, + "timestamp": "2025-10-01 03:23:10.870425", + "step": 1826, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:10.911595", + "step": 1826, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01658882386982441, + "timestamp": "2025-10-01 03:23:10.914781", + "step": 1827, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:10.950379", + "step": 1827, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0059707993641495705, + "timestamp": "2025-10-01 03:23:10.977100", + "step": 1828, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:11.012014", + "step": 1828, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0899844616651535, + "timestamp": "2025-10-01 03:23:11.016175", + "step": 1829, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:11.056912", + "step": 1829, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06394823640584946, + "timestamp": "2025-10-01 03:23:11.061216", + "step": 1830, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:11.111814", + "step": 1830, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021038414910435677, + "timestamp": "2025-10-01 03:23:11.117224", + "step": 1831, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:11.154924", + "step": 1831, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0341816283762455, + "timestamp": "2025-10-01 03:23:11.182214", + "step": 1832, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:11.227426", + "step": 1832, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024983931332826614, + "timestamp": "2025-10-01 03:23:11.233532", + "step": 1833, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:11.269263", + "step": 1833, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026816213503479958, + "timestamp": "2025-10-01 03:23:11.276956", + "step": 1834, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:11.322784", + "step": 1834, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012477025389671326, + "timestamp": "2025-10-01 03:23:11.330456", + "step": 1835, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:11.368932", + "step": 1835, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012629677541553974, + "timestamp": "2025-10-01 03:23:11.398104", + "step": 1836, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:11.439347", + "step": 1836, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02436967007815838, + "timestamp": "2025-10-01 03:23:11.445932", + "step": 1837, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:11.490233", + "step": 1837, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010997575707733631, + "timestamp": "2025-10-01 03:23:11.497751", + "step": 1838, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:11.543966", + "step": 1838, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07364016026258469, + "timestamp": "2025-10-01 03:23:11.549611", + "step": 1839, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:11.601296", + "step": 1839, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013225398026406765, + "timestamp": "2025-10-01 03:23:11.636961", + "step": 1840, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:11.721373", + "step": 1840, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03609352186322212, + "timestamp": "2025-10-01 03:23:11.729892", + "step": 1841, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:11.791258", + "step": 1841, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03090008907020092, + "timestamp": "2025-10-01 03:23:11.799277", + "step": 1842, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:11.854851", + "step": 1842, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005388534162193537, + "timestamp": "2025-10-01 03:23:11.862489", + "step": 1843, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:11.918322", + "step": 1843, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03794742003083229, + "timestamp": "2025-10-01 03:23:11.946401", + "step": 1844, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:12.004397", + "step": 1844, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06488853693008423, + "timestamp": "2025-10-01 03:23:12.015708", + "step": 1845, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:12.088374", + "step": 1845, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008490628562867641, + "timestamp": "2025-10-01 03:23:12.097588", + "step": 1846, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:12.163308", + "step": 1846, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04663119092583656, + "timestamp": "2025-10-01 03:23:12.171563", + "step": 1847, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:12.222387", + "step": 1847, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03777381405234337, + "timestamp": "2025-10-01 03:23:12.251215", + "step": 1848, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:12.310908", + "step": 1848, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03063628263771534, + "timestamp": "2025-10-01 03:23:12.318695", + "step": 1849, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:12.376998", + "step": 1849, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05681147053837776, + "timestamp": "2025-10-01 03:23:12.384142", + "step": 1850, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:12.455536", + "step": 1850, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043976105749607086, + "timestamp": "2025-10-01 03:23:12.463013", + "step": 1851, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:12.511926", + "step": 1851, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03480374813079834, + "timestamp": "2025-10-01 03:23:12.540635", + "step": 1852, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:12.626831", + "step": 1852, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015294007956981659, + "timestamp": "2025-10-01 03:23:12.631274", + "step": 1853, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:12.700417", + "step": 1853, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.050582271069288254, + "timestamp": "2025-10-01 03:23:12.710638", + "step": 1854, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:12.775611", + "step": 1854, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03225139528512955, + "timestamp": "2025-10-01 03:23:12.784093", + "step": 1855, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:12.844688", + "step": 1855, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04026796668767929, + "timestamp": "2025-10-01 03:23:12.876145", + "step": 1856, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:12.920165", + "step": 1856, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019761864095926285, + "timestamp": "2025-10-01 03:23:12.928262", + "step": 1857, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:12.966041", + "step": 1857, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009301009587943554, + "timestamp": "2025-10-01 03:23:12.972537", + "step": 1858, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:13.019354", + "step": 1858, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036473363637924194, + "timestamp": "2025-10-01 03:23:13.024428", + "step": 1859, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:13.065056", + "step": 1859, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008913928642868996, + "timestamp": "2025-10-01 03:23:13.094409", + "step": 1860, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:13.144471", + "step": 1860, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05764279142022133, + "timestamp": "2025-10-01 03:23:13.152079", + "step": 1861, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:13.200357", + "step": 1861, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014066440984606743, + "timestamp": "2025-10-01 03:23:13.206044", + "step": 1862, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:13.243744", + "step": 1862, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07103769481182098, + "timestamp": "2025-10-01 03:23:13.253727", + "step": 1863, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:13.295102", + "step": 1863, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03015742637217045, + "timestamp": "2025-10-01 03:23:13.320713", + "step": 1864, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:13.359548", + "step": 1864, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04952927306294441, + "timestamp": "2025-10-01 03:23:13.365420", + "step": 1865, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:13.402761", + "step": 1865, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03459000214934349, + "timestamp": "2025-10-01 03:23:13.410161", + "step": 1866, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:13.447906", + "step": 1866, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011350290849804878, + "timestamp": "2025-10-01 03:23:13.454232", + "step": 1867, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:13.491275", + "step": 1867, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020454224199056625, + "timestamp": "2025-10-01 03:23:13.518887", + "step": 1868, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:13.556963", + "step": 1868, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022910675033926964, + "timestamp": "2025-10-01 03:23:13.561785", + "step": 1869, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:13.607325", + "step": 1869, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03570224717259407, + "timestamp": "2025-10-01 03:23:13.613741", + "step": 1870, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:13.654829", + "step": 1870, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014956941828131676, + "timestamp": "2025-10-01 03:23:13.660972", + "step": 1871, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:13.711931", + "step": 1871, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0248782429844141, + "timestamp": "2025-10-01 03:23:13.745169", + "step": 1872, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:13.784296", + "step": 1872, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046405937522649765, + "timestamp": "2025-10-01 03:23:13.791662", + "step": 1873, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:13.840719", + "step": 1873, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024458179250359535, + "timestamp": "2025-10-01 03:23:13.847715", + "step": 1874, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:13.884762", + "step": 1874, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01898362673819065, + "timestamp": "2025-10-01 03:23:13.891353", + "step": 1875, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:13.929573", + "step": 1875, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01895214430987835, + "timestamp": "2025-10-01 03:23:13.957650", + "step": 1876, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.001958", + "step": 1876, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013152094557881355, + "timestamp": "2025-10-01 03:23:14.008212", + "step": 1877, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.049964", + "step": 1877, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04193142056465149, + "timestamp": "2025-10-01 03:23:14.056112", + "step": 1878, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:14.097222", + "step": 1878, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01897621341049671, + "timestamp": "2025-10-01 03:23:14.103004", + "step": 1879, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.142870", + "step": 1879, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021704360842704773, + "timestamp": "2025-10-01 03:23:14.169657", + "step": 1880, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.216756", + "step": 1880, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016968155279755592, + "timestamp": "2025-10-01 03:23:14.221800", + "step": 1881, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.262030", + "step": 1881, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009566823951900005, + "timestamp": "2025-10-01 03:23:14.266578", + "step": 1882, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.308581", + "step": 1882, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024831965565681458, + "timestamp": "2025-10-01 03:23:14.315458", + "step": 1883, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.349899", + "step": 1883, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017259962856769562, + "timestamp": "2025-10-01 03:23:14.376400", + "step": 1884, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.411384", + "step": 1884, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030902935191988945, + "timestamp": "2025-10-01 03:23:14.417418", + "step": 1885, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.462643", + "step": 1885, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014288458041846752, + "timestamp": "2025-10-01 03:23:14.467988", + "step": 1886, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.502272", + "step": 1886, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040712300688028336, + "timestamp": "2025-10-01 03:23:14.507780", + "step": 1887, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:14.546846", + "step": 1887, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031409863382577896, + "timestamp": "2025-10-01 03:23:14.573448", + "step": 1888, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.618063", + "step": 1888, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.052797649055719376, + "timestamp": "2025-10-01 03:23:14.622693", + "step": 1889, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.660739", + "step": 1889, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00784988608211279, + "timestamp": "2025-10-01 03:23:14.666241", + "step": 1890, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:14.706281", + "step": 1890, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03862238675355911, + "timestamp": "2025-10-01 03:23:14.711036", + "step": 1891, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.751967", + "step": 1891, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06386331468820572, + "timestamp": "2025-10-01 03:23:14.778371", + "step": 1892, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.818447", + "step": 1892, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0171692855656147, + "timestamp": "2025-10-01 03:23:14.824816", + "step": 1893, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.872348", + "step": 1893, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03791377693414688, + "timestamp": "2025-10-01 03:23:14.877176", + "step": 1894, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.914231", + "step": 1894, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011930077336728573, + "timestamp": "2025-10-01 03:23:14.920438", + "step": 1895, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:14.963261", + "step": 1895, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027899330481886864, + "timestamp": "2025-10-01 03:23:14.991787", + "step": 1896, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:15.031164", + "step": 1896, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021690206602215767, + "timestamp": "2025-10-01 03:23:15.040100", + "step": 1897, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:15.082122", + "step": 1897, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011868019588291645, + "timestamp": "2025-10-01 03:23:15.089349", + "step": 1898, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:23:15.133164", + "step": 1898, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04636990278959274, + "timestamp": "2025-10-01 03:23:15.141746", + "step": 1899, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:15.182107", + "step": 1899, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02797533944249153, + "timestamp": "2025-10-01 03:23:15.208055", + "step": 1900, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:15.257512", + "step": 1900, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.042379230260849, + "timestamp": "2025-10-01 03:23:15.268962", + "step": 1901, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:15.313923", + "step": 1901, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04650892689824104, + "timestamp": "2025-10-01 03:23:15.323744", + "step": 1902, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:15.369097", + "step": 1902, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017495086416602135, + "timestamp": "2025-10-01 03:23:15.375527", + "step": 1903, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:23:15.412287", + "step": 1903, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011524445377290249, + "timestamp": "2025-10-01 03:23:15.440338", + "step": 1904, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:15.480533", + "step": 1904, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023249551653862, + "timestamp": "2025-10-01 03:23:15.485569", + "step": 1905, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:15.521626", + "step": 1905, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03280263766646385, + "timestamp": "2025-10-01 03:23:15.528171", + "step": 1906, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:15.564935", + "step": 1906, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007480777334421873, + "timestamp": "2025-10-01 03:23:15.569458", + "step": 1907, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:15.604707", + "step": 1907, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038221780210733414, + "timestamp": "2025-10-01 03:23:15.631373", + "step": 1908, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:15.671784", + "step": 1908, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05107191950082779, + "timestamp": "2025-10-01 03:23:15.677645", + "step": 1909, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:15.721698", + "step": 1909, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024847855791449547, + "timestamp": "2025-10-01 03:23:15.726518", + "step": 1910, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:15.761839", + "step": 1910, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026040086522698402, + "timestamp": "2025-10-01 03:23:15.767933", + "step": 1911, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:15.803270", + "step": 1911, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019203921779990196, + "timestamp": "2025-10-01 03:23:15.831201", + "step": 1912, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:15.881815", + "step": 1912, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014420105144381523, + "timestamp": "2025-10-01 03:23:15.892051", + "step": 1913, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:15.940542", + "step": 1913, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008012707345187664, + "timestamp": "2025-10-01 03:23:15.956673", + "step": 1914, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.009202", + "step": 1914, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014441883191466331, + "timestamp": "2025-10-01 03:23:16.020378", + "step": 1915, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.075541", + "step": 1915, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041333891451358795, + "timestamp": "2025-10-01 03:23:16.108422", + "step": 1916, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.169348", + "step": 1916, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012989791110157967, + "timestamp": "2025-10-01 03:23:16.174580", + "step": 1917, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:16.229868", + "step": 1917, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003559166332706809, + "timestamp": "2025-10-01 03:23:16.243603", + "step": 1918, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.290219", + "step": 1918, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021030839532613754, + "timestamp": "2025-10-01 03:23:16.301844", + "step": 1919, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.346872", + "step": 1919, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009792658500373363, + "timestamp": "2025-10-01 03:23:16.376889", + "step": 1920, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.422161", + "step": 1920, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06047448143362999, + "timestamp": "2025-10-01 03:23:16.426666", + "step": 1921, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:16.471892", + "step": 1921, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028402967378497124, + "timestamp": "2025-10-01 03:23:16.475732", + "step": 1922, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.511350", + "step": 1922, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07725050300359726, + "timestamp": "2025-10-01 03:23:16.520391", + "step": 1923, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.570941", + "step": 1923, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08100159466266632, + "timestamp": "2025-10-01 03:23:16.599505", + "step": 1924, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.634622", + "step": 1924, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0293588787317276, + "timestamp": "2025-10-01 03:23:16.638284", + "step": 1925, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.679676", + "step": 1925, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012878930196166039, + "timestamp": "2025-10-01 03:23:16.684934", + "step": 1926, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.724113", + "step": 1926, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.09337181597948074, + "timestamp": "2025-10-01 03:23:16.730846", + "step": 1927, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.770627", + "step": 1927, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03842221945524216, + "timestamp": "2025-10-01 03:23:16.795849", + "step": 1928, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:16.836462", + "step": 1928, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05975392088294029, + "timestamp": "2025-10-01 03:23:16.841515", + "step": 1929, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.881932", + "step": 1929, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03814053162932396, + "timestamp": "2025-10-01 03:23:16.888518", + "step": 1930, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.935632", + "step": 1930, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04372198134660721, + "timestamp": "2025-10-01 03:23:16.942431", + "step": 1931, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:16.980475", + "step": 1931, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03432371839880943, + "timestamp": "2025-10-01 03:23:17.004426", + "step": 1932, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:17.038640", + "step": 1932, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009401918388903141, + "timestamp": "2025-10-01 03:23:17.043975", + "step": 1933, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:17.102255", + "step": 1933, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033208902925252914, + "timestamp": "2025-10-01 03:23:17.108064", + "step": 1934, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:17.148226", + "step": 1934, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.044682204723358154, + "timestamp": "2025-10-01 03:23:17.153645", + "step": 1935, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:17.193070", + "step": 1935, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.09747815877199173, + "timestamp": "2025-10-01 03:23:17.218449", + "step": 1936, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:17.264206", + "step": 1936, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024243256077170372, + "timestamp": "2025-10-01 03:23:17.267582", + "step": 1937, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:17.310401", + "step": 1937, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01694002002477646, + "timestamp": "2025-10-01 03:23:17.315484", + "step": 1938, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:17.349911", + "step": 1938, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05943561717867851, + "timestamp": "2025-10-01 03:23:17.355533", + "step": 1939, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:17.392462", + "step": 1939, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.047237228602170944, + "timestamp": "2025-10-01 03:23:17.416596", + "step": 1940, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:17.453100", + "step": 1940, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011688866652548313, + "timestamp": "2025-10-01 03:23:17.458737", + "step": 1941, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:17.501933", + "step": 1941, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05514037236571312, + "timestamp": "2025-10-01 03:23:17.505082", + "step": 1942, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:17.543806", + "step": 1942, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02804018370807171, + "timestamp": "2025-10-01 03:23:17.547131", + "step": 1943, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:17.584399", + "step": 1943, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018700430169701576, + "timestamp": "2025-10-01 03:23:17.612679", + "step": 1944, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:17.656548", + "step": 1944, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.042534954845905304, + "timestamp": "2025-10-01 03:23:17.664920", + "step": 1945, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:17.704671", + "step": 1945, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026802020147442818, + "timestamp": "2025-10-01 03:23:17.715754", + "step": 1946, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:17.763766", + "step": 1946, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017596030607819557, + "timestamp": "2025-10-01 03:23:17.773853", + "step": 1947, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:17.815384", + "step": 1947, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.050656307488679886, + "timestamp": "2025-10-01 03:23:17.845917", + "step": 1948, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:17.886215", + "step": 1948, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014432032592594624, + "timestamp": "2025-10-01 03:23:17.892969", + "step": 1949, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:17.931965", + "step": 1949, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0193165335804224, + "timestamp": "2025-10-01 03:23:17.941510", + "step": 1950, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:17.991159", + "step": 1950, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05090802535414696, + "timestamp": "2025-10-01 03:23:18.003912", + "step": 1951, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:18.050084", + "step": 1951, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026249686256051064, + "timestamp": "2025-10-01 03:23:18.083028", + "step": 1952, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:18.132325", + "step": 1952, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019427957013249397, + "timestamp": "2025-10-01 03:23:18.144568", + "step": 1953, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:18.183253", + "step": 1953, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030810439959168434, + "timestamp": "2025-10-01 03:23:18.194090", + "step": 1954, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:18.248673", + "step": 1954, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.051741208881139755, + "timestamp": "2025-10-01 03:23:18.260619", + "step": 1955, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:18.312894", + "step": 1955, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03157168999314308, + "timestamp": "2025-10-01 03:23:18.346892", + "step": 1956, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:18.406058", + "step": 1956, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03012295626103878, + "timestamp": "2025-10-01 03:23:18.418505", + "step": 1957, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:18.462025", + "step": 1957, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06961007416248322, + "timestamp": "2025-10-01 03:23:18.473305", + "step": 1958, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:18.517773", + "step": 1958, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018070174381136894, + "timestamp": "2025-10-01 03:23:18.528909", + "step": 1959, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:18.584260", + "step": 1959, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04150262102484703, + "timestamp": "2025-10-01 03:23:18.617201", + "step": 1960, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:18.656871", + "step": 1960, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0444687083363533, + "timestamp": "2025-10-01 03:23:18.668118", + "step": 1961, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:18.710043", + "step": 1961, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015788834542036057, + "timestamp": "2025-10-01 03:23:18.718828", + "step": 1962, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:18.772250", + "step": 1962, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027777841314673424, + "timestamp": "2025-10-01 03:23:18.781832", + "step": 1963, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:18.820404", + "step": 1963, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0309485774487257, + "timestamp": "2025-10-01 03:23:18.853285", + "step": 1964, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:18.896957", + "step": 1964, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033428214490413666, + "timestamp": "2025-10-01 03:23:18.913920", + "step": 1965, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:18.960374", + "step": 1965, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05430217459797859, + "timestamp": "2025-10-01 03:23:18.972131", + "step": 1966, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:19.017606", + "step": 1966, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02248579077422619, + "timestamp": "2025-10-01 03:23:19.028677", + "step": 1967, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:19.071069", + "step": 1967, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024171510711312294, + "timestamp": "2025-10-01 03:23:19.096353", + "step": 1968, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:19.131408", + "step": 1968, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03238046541810036, + "timestamp": "2025-10-01 03:23:19.134795", + "step": 1969, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:19.185146", + "step": 1969, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028779421001672745, + "timestamp": "2025-10-01 03:23:19.189321", + "step": 1970, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:19.254121", + "step": 1970, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019460974261164665, + "timestamp": "2025-10-01 03:23:19.268538", + "step": 1971, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:19.315373", + "step": 1971, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024861710146069527, + "timestamp": "2025-10-01 03:23:19.349052", + "step": 1972, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:19.407782", + "step": 1972, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049419987946748734, + "timestamp": "2025-10-01 03:23:19.413143", + "step": 1973, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:19.453701", + "step": 1973, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023134376853704453, + "timestamp": "2025-10-01 03:23:19.466669", + "step": 1974, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:19.532377", + "step": 1974, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043055348098278046, + "timestamp": "2025-10-01 03:23:19.545390", + "step": 1975, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:19.590985", + "step": 1975, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027013003826141357, + "timestamp": "2025-10-01 03:23:19.625864", + "step": 1976, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:23:22.899967", + "step": 1976, + "epoch": 1 + }, + { + "type": "pplx", + "content": 2543899.9966546847, + "timestamp": "2025-10-01 03:23:22.908749", + "step": 1976, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:22.945870", + "step": 1976, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04654426500201225, + "timestamp": "2025-10-01 03:23:22.954909", + "step": 1977, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:22.996969", + "step": 1977, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018634900450706482, + "timestamp": "2025-10-01 03:23:23.005150", + "step": 1978, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:23.063608", + "step": 1978, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08827479183673859, + "timestamp": "2025-10-01 03:23:23.071173", + "step": 1979, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:23.126000", + "step": 1979, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004347535315901041, + "timestamp": "2025-10-01 03:23:23.155498", + "step": 1980, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:23.206893", + "step": 1980, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07737953215837479, + "timestamp": "2025-10-01 03:23:23.216061", + "step": 1981, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:23:23.260165", + "step": 1981, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049996308982372284, + "timestamp": "2025-10-01 03:23:23.269127", + "step": 1982, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:23.322756", + "step": 1982, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024510132148861885, + "timestamp": "2025-10-01 03:23:23.330787", + "step": 1983, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:23.371435", + "step": 1983, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024334823712706566, + "timestamp": "2025-10-01 03:23:23.401934", + "step": 1984, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:23.444322", + "step": 1984, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03451919183135033, + "timestamp": "2025-10-01 03:23:23.453821", + "step": 1985, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:23.509268", + "step": 1985, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043655358254909515, + "timestamp": "2025-10-01 03:23:23.512887", + "step": 1986, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:23.574567", + "step": 1986, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0282584261149168, + "timestamp": "2025-10-01 03:23:23.580449", + "step": 1987, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:23.619700", + "step": 1987, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02683725766837597, + "timestamp": "2025-10-01 03:23:23.651841", + "step": 1988, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:23.696402", + "step": 1988, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036644257605075836, + "timestamp": "2025-10-01 03:23:23.708099", + "step": 1989, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:23.746220", + "step": 1989, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03293076902627945, + "timestamp": "2025-10-01 03:23:23.751314", + "step": 1990, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:23.800062", + "step": 1990, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027137652039527893, + "timestamp": "2025-10-01 03:23:23.804330", + "step": 1991, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:23.840447", + "step": 1991, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010363970883190632, + "timestamp": "2025-10-01 03:23:23.872640", + "step": 1992, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:23.916671", + "step": 1992, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01954038254916668, + "timestamp": "2025-10-01 03:23:23.930514", + "step": 1993, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:23.979478", + "step": 1993, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028793832287192345, + "timestamp": "2025-10-01 03:23:23.989829", + "step": 1994, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:24.033750", + "step": 1994, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01013428159058094, + "timestamp": "2025-10-01 03:23:24.042581", + "step": 1995, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:24.087398", + "step": 1995, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01833929494023323, + "timestamp": "2025-10-01 03:23:24.116921", + "step": 1996, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:24.158872", + "step": 1996, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034816913306713104, + "timestamp": "2025-10-01 03:23:24.167239", + "step": 1997, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:24.208175", + "step": 1997, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014130090363323689, + "timestamp": "2025-10-01 03:23:24.218082", + "step": 1998, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:24.259594", + "step": 1998, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04431106522679329, + "timestamp": "2025-10-01 03:23:24.270116", + "step": 1999, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:24.314462", + "step": 1999, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032761186361312866, + "timestamp": "2025-10-01 03:23:24.353019", + "step": 2000, + "epoch": 1 + }, + { + "type": "info", + "content": "Checkpoint saved at step 2000", + "timestamp": "2025-10-01 03:23:29.436104", + "step": 2000, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:29.478578", + "step": 2000, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033507563173770905, + "timestamp": "2025-10-01 03:23:29.486142", + "step": 2001, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:29.527398", + "step": 2001, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01809527352452278, + "timestamp": "2025-10-01 03:23:29.534477", + "step": 2002, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:29.575860", + "step": 2002, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03298764303326607, + "timestamp": "2025-10-01 03:23:29.584004", + "step": 2003, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:29.627046", + "step": 2003, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06738895922899246, + "timestamp": "2025-10-01 03:23:29.659326", + "step": 2004, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:29.702637", + "step": 2004, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03823332116007805, + "timestamp": "2025-10-01 03:23:29.714090", + "step": 2005, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:29.762385", + "step": 2005, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04590417072176933, + "timestamp": "2025-10-01 03:23:29.773992", + "step": 2006, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:23:29.818265", + "step": 2006, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031097671017050743, + "timestamp": "2025-10-01 03:23:29.828997", + "step": 2007, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:29.883434", + "step": 2007, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02288343571126461, + "timestamp": "2025-10-01 03:23:29.913257", + "step": 2008, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:29.952946", + "step": 2008, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01878470554947853, + "timestamp": "2025-10-01 03:23:29.962326", + "step": 2009, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:30.005113", + "step": 2009, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02306522987782955, + "timestamp": "2025-10-01 03:23:30.012344", + "step": 2010, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:30.051764", + "step": 2010, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04884868115186691, + "timestamp": "2025-10-01 03:23:30.054921", + "step": 2011, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:30.112004", + "step": 2011, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019420910626649857, + "timestamp": "2025-10-01 03:23:30.139434", + "step": 2012, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:30.179376", + "step": 2012, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02202313020825386, + "timestamp": "2025-10-01 03:23:30.182789", + "step": 2013, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:30.220630", + "step": 2013, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06839004158973694, + "timestamp": "2025-10-01 03:23:30.229445", + "step": 2014, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:30.293112", + "step": 2014, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005039529409259558, + "timestamp": "2025-10-01 03:23:30.306671", + "step": 2015, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:30.351214", + "step": 2015, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011465862393379211, + "timestamp": "2025-10-01 03:23:30.387028", + "step": 2016, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:30.435001", + "step": 2016, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04197064787149429, + "timestamp": "2025-10-01 03:23:30.444062", + "step": 2017, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:30.486452", + "step": 2017, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02228737808763981, + "timestamp": "2025-10-01 03:23:30.496347", + "step": 2018, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:30.540625", + "step": 2018, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02458062954246998, + "timestamp": "2025-10-01 03:23:30.551230", + "step": 2019, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:30.592901", + "step": 2019, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.054691195487976074, + "timestamp": "2025-10-01 03:23:30.623559", + "step": 2020, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:30.669867", + "step": 2020, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029633903875947, + "timestamp": "2025-10-01 03:23:30.680888", + "step": 2021, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:30.723130", + "step": 2021, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041575219482183456, + "timestamp": "2025-10-01 03:23:30.731843", + "step": 2022, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:30.792301", + "step": 2022, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0427282378077507, + "timestamp": "2025-10-01 03:23:30.798180", + "step": 2023, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:30.839753", + "step": 2023, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.09807615727186203, + "timestamp": "2025-10-01 03:23:30.868299", + "step": 2024, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:30.908339", + "step": 2024, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008421245031058788, + "timestamp": "2025-10-01 03:23:30.918255", + "step": 2025, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:30.964902", + "step": 2025, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004979191813617945, + "timestamp": "2025-10-01 03:23:30.973747", + "step": 2026, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:31.037874", + "step": 2026, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02183048240840435, + "timestamp": "2025-10-01 03:23:31.046451", + "step": 2027, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:31.089471", + "step": 2027, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04447273537516594, + "timestamp": "2025-10-01 03:23:31.122118", + "step": 2028, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:31.176432", + "step": 2028, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03088625892996788, + "timestamp": "2025-10-01 03:23:31.183571", + "step": 2029, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:31.234566", + "step": 2029, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015925809741020203, + "timestamp": "2025-10-01 03:23:31.252055", + "step": 2030, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:31.294506", + "step": 2030, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07503583282232285, + "timestamp": "2025-10-01 03:23:31.302397", + "step": 2031, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:31.346369", + "step": 2031, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01867450587451458, + "timestamp": "2025-10-01 03:23:31.375731", + "step": 2032, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:31.444157", + "step": 2032, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020219704136252403, + "timestamp": "2025-10-01 03:23:31.454200", + "step": 2033, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:31.496223", + "step": 2033, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03838988021016121, + "timestamp": "2025-10-01 03:23:31.503732", + "step": 2034, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:31.543873", + "step": 2034, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029351282864809036, + "timestamp": "2025-10-01 03:23:31.548751", + "step": 2035, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:31.589062", + "step": 2035, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029729291796684265, + "timestamp": "2025-10-01 03:23:31.618501", + "step": 2036, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:31.662457", + "step": 2036, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02078876458108425, + "timestamp": "2025-10-01 03:23:31.673926", + "step": 2037, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:31.716563", + "step": 2037, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015081949532032013, + "timestamp": "2025-10-01 03:23:31.722865", + "step": 2038, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:31.781085", + "step": 2038, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021035317331552505, + "timestamp": "2025-10-01 03:23:31.793869", + "step": 2039, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:31.840078", + "step": 2039, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009250514209270477, + "timestamp": "2025-10-01 03:23:31.866693", + "step": 2040, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:31.914193", + "step": 2040, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03491013124585152, + "timestamp": "2025-10-01 03:23:31.921561", + "step": 2041, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:31.974871", + "step": 2041, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038056474179029465, + "timestamp": "2025-10-01 03:23:31.985653", + "step": 2042, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:32.048504", + "step": 2042, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0358700267970562, + "timestamp": "2025-10-01 03:23:32.062097", + "step": 2043, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:32.119181", + "step": 2043, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007677328307181597, + "timestamp": "2025-10-01 03:23:32.151045", + "step": 2044, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:32.192524", + "step": 2044, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026209909468889236, + "timestamp": "2025-10-01 03:23:32.202230", + "step": 2045, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:32.258139", + "step": 2045, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029006438329815865, + "timestamp": "2025-10-01 03:23:32.271640", + "step": 2046, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:32.327885", + "step": 2046, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018013449385762215, + "timestamp": "2025-10-01 03:23:32.340176", + "step": 2047, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:32.381184", + "step": 2047, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019124235957860947, + "timestamp": "2025-10-01 03:23:32.415514", + "step": 2048, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:32.461228", + "step": 2048, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029310693964362144, + "timestamp": "2025-10-01 03:23:32.473722", + "step": 2049, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:32.521593", + "step": 2049, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05104665085673332, + "timestamp": "2025-10-01 03:23:32.532845", + "step": 2050, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:32.598504", + "step": 2050, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04479198530316353, + "timestamp": "2025-10-01 03:23:32.602630", + "step": 2051, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:32.639965", + "step": 2051, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0120932562276721, + "timestamp": "2025-10-01 03:23:32.671491", + "step": 2052, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:32.707391", + "step": 2052, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022698896005749702, + "timestamp": "2025-10-01 03:23:32.717135", + "step": 2053, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:32.762344", + "step": 2053, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04033631086349487, + "timestamp": "2025-10-01 03:23:32.771700", + "step": 2054, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:32.816357", + "step": 2054, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013599775731563568, + "timestamp": "2025-10-01 03:23:32.826072", + "step": 2055, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:32.868860", + "step": 2055, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033393122255802155, + "timestamp": "2025-10-01 03:23:32.897738", + "step": 2056, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:32.940183", + "step": 2056, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02597285620868206, + "timestamp": "2025-10-01 03:23:32.950678", + "step": 2057, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:32.993231", + "step": 2057, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026632709428668022, + "timestamp": "2025-10-01 03:23:33.004890", + "step": 2058, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:33.045594", + "step": 2058, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0036935957614332438, + "timestamp": "2025-10-01 03:23:33.056771", + "step": 2059, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:33.105452", + "step": 2059, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0359211303293705, + "timestamp": "2025-10-01 03:23:33.139986", + "step": 2060, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:33.189751", + "step": 2060, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009612455032765865, + "timestamp": "2025-10-01 03:23:33.202337", + "step": 2061, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:33.242242", + "step": 2061, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027988532558083534, + "timestamp": "2025-10-01 03:23:33.248935", + "step": 2062, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:33.290745", + "step": 2062, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023026781156659126, + "timestamp": "2025-10-01 03:23:33.299924", + "step": 2063, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:33.339677", + "step": 2063, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04542103037238121, + "timestamp": "2025-10-01 03:23:33.370263", + "step": 2064, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:33.411854", + "step": 2064, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01254353579133749, + "timestamp": "2025-10-01 03:23:33.418001", + "step": 2065, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:33.458562", + "step": 2065, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0232020765542984, + "timestamp": "2025-10-01 03:23:33.469214", + "step": 2066, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:33.512362", + "step": 2066, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02318483591079712, + "timestamp": "2025-10-01 03:23:33.527908", + "step": 2067, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:33.582200", + "step": 2067, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02961287833750248, + "timestamp": "2025-10-01 03:23:33.617369", + "step": 2068, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:33.667658", + "step": 2068, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03141174837946892, + "timestamp": "2025-10-01 03:23:33.671216", + "step": 2069, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:23:33.727136", + "step": 2069, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019340334460139275, + "timestamp": "2025-10-01 03:23:33.731659", + "step": 2070, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:33.772854", + "step": 2070, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02013668417930603, + "timestamp": "2025-10-01 03:23:33.788201", + "step": 2071, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:33.832595", + "step": 2071, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.042259376496076584, + "timestamp": "2025-10-01 03:23:33.869343", + "step": 2072, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:33.934016", + "step": 2072, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.056185297667980194, + "timestamp": "2025-10-01 03:23:33.946869", + "step": 2073, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:33.992167", + "step": 2073, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021214352920651436, + "timestamp": "2025-10-01 03:23:34.002995", + "step": 2074, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:34.053788", + "step": 2074, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010944187641143799, + "timestamp": "2025-10-01 03:23:34.064836", + "step": 2075, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:34.107901", + "step": 2075, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020021220669150352, + "timestamp": "2025-10-01 03:23:34.133674", + "step": 2076, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:34.177802", + "step": 2076, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019047677516937256, + "timestamp": "2025-10-01 03:23:34.186946", + "step": 2077, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:34.230636", + "step": 2077, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03485775366425514, + "timestamp": "2025-10-01 03:23:34.241916", + "step": 2078, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:34.283598", + "step": 2078, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018486912595108151, + "timestamp": "2025-10-01 03:23:34.293565", + "step": 2079, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:34.346747", + "step": 2079, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.063831627368927, + "timestamp": "2025-10-01 03:23:34.382247", + "step": 2080, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:34.426695", + "step": 2080, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0574987418949604, + "timestamp": "2025-10-01 03:23:34.434833", + "step": 2081, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:34.473190", + "step": 2081, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027335377410054207, + "timestamp": "2025-10-01 03:23:34.485511", + "step": 2082, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:34.530826", + "step": 2082, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029602419584989548, + "timestamp": "2025-10-01 03:23:34.544769", + "step": 2083, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:34.590389", + "step": 2083, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015660028904676437, + "timestamp": "2025-10-01 03:23:34.627793", + "step": 2084, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:34.673703", + "step": 2084, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015918085351586342, + "timestamp": "2025-10-01 03:23:34.684318", + "step": 2085, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:34.727398", + "step": 2085, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008618530817329884, + "timestamp": "2025-10-01 03:23:34.738101", + "step": 2086, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:34.781716", + "step": 2086, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04550859332084656, + "timestamp": "2025-10-01 03:23:34.792907", + "step": 2087, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:34.839107", + "step": 2087, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016250381246209145, + "timestamp": "2025-10-01 03:23:34.865579", + "step": 2088, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:34.904434", + "step": 2088, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02914227917790413, + "timestamp": "2025-10-01 03:23:34.920099", + "step": 2089, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:34.983253", + "step": 2089, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031465597450733185, + "timestamp": "2025-10-01 03:23:34.988434", + "step": 2090, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:35.025438", + "step": 2090, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028675192967057228, + "timestamp": "2025-10-01 03:23:35.038044", + "step": 2091, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:35.087345", + "step": 2091, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0187153872102499, + "timestamp": "2025-10-01 03:23:35.119260", + "step": 2092, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:35.163351", + "step": 2092, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026117147877812386, + "timestamp": "2025-10-01 03:23:35.173744", + "step": 2093, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:35.215977", + "step": 2093, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03809747472405434, + "timestamp": "2025-10-01 03:23:35.225482", + "step": 2094, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:35.267236", + "step": 2094, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029893839731812477, + "timestamp": "2025-10-01 03:23:35.277232", + "step": 2095, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:35.319575", + "step": 2095, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04177110642194748, + "timestamp": "2025-10-01 03:23:35.352792", + "step": 2096, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:35.401993", + "step": 2096, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011287281289696693, + "timestamp": "2025-10-01 03:23:35.413932", + "step": 2097, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:35.471098", + "step": 2097, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04693398252129555, + "timestamp": "2025-10-01 03:23:35.482833", + "step": 2098, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:35.524529", + "step": 2098, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025879738852381706, + "timestamp": "2025-10-01 03:23:35.535745", + "step": 2099, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:35.575967", + "step": 2099, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007430814206600189, + "timestamp": "2025-10-01 03:23:35.609193", + "step": 2100, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:35.653904", + "step": 2100, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012723417021334171, + "timestamp": "2025-10-01 03:23:35.666287", + "step": 2101, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:35.716515", + "step": 2101, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014151463285088539, + "timestamp": "2025-10-01 03:23:35.723737", + "step": 2102, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:35.763669", + "step": 2102, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05697261914610863, + "timestamp": "2025-10-01 03:23:35.773203", + "step": 2103, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:35.816061", + "step": 2103, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018576299771666527, + "timestamp": "2025-10-01 03:23:35.841865", + "step": 2104, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:35.896071", + "step": 2104, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026554467156529427, + "timestamp": "2025-10-01 03:23:35.904754", + "step": 2105, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:35.947000", + "step": 2105, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01851813867688179, + "timestamp": "2025-10-01 03:23:35.957262", + "step": 2106, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:36.000039", + "step": 2106, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05860428884625435, + "timestamp": "2025-10-01 03:23:36.008775", + "step": 2107, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:36.050146", + "step": 2107, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009077539667487144, + "timestamp": "2025-10-01 03:23:36.077433", + "step": 2108, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:36.123246", + "step": 2108, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04648857191205025, + "timestamp": "2025-10-01 03:23:36.130055", + "step": 2109, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:36.172824", + "step": 2109, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03910495713353157, + "timestamp": "2025-10-01 03:23:36.175989", + "step": 2110, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:36.209995", + "step": 2110, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026357071474194527, + "timestamp": "2025-10-01 03:23:36.222025", + "step": 2111, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:36.264128", + "step": 2111, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04573330655694008, + "timestamp": "2025-10-01 03:23:36.293884", + "step": 2112, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:36.337705", + "step": 2112, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010213499888777733, + "timestamp": "2025-10-01 03:23:36.349472", + "step": 2113, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:36.390735", + "step": 2113, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015627682209014893, + "timestamp": "2025-10-01 03:23:36.404189", + "step": 2114, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:36.461969", + "step": 2114, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02324027195572853, + "timestamp": "2025-10-01 03:23:36.472922", + "step": 2115, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:36.544865", + "step": 2115, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07198668271303177, + "timestamp": "2025-10-01 03:23:36.581025", + "step": 2116, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:36.618864", + "step": 2116, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012532820925116539, + "timestamp": "2025-10-01 03:23:36.629605", + "step": 2117, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:36.691089", + "step": 2117, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008163806982338428, + "timestamp": "2025-10-01 03:23:36.700784", + "step": 2118, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:36.759515", + "step": 2118, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04252147302031517, + "timestamp": "2025-10-01 03:23:36.767218", + "step": 2119, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:36.810692", + "step": 2119, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0500764474272728, + "timestamp": "2025-10-01 03:23:36.846136", + "step": 2120, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:36.905314", + "step": 2120, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04363352060317993, + "timestamp": "2025-10-01 03:23:36.917397", + "step": 2121, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:36.959274", + "step": 2121, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007748961914330721, + "timestamp": "2025-10-01 03:23:36.968857", + "step": 2122, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:37.011953", + "step": 2122, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.051893915981054306, + "timestamp": "2025-10-01 03:23:37.021907", + "step": 2123, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:37.081942", + "step": 2123, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01287128683179617, + "timestamp": "2025-10-01 03:23:37.113539", + "step": 2124, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:37.172383", + "step": 2124, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011111781001091003, + "timestamp": "2025-10-01 03:23:37.181625", + "step": 2125, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:37.231472", + "step": 2125, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006884687580168247, + "timestamp": "2025-10-01 03:23:37.239118", + "step": 2126, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:37.295604", + "step": 2126, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009649877436459064, + "timestamp": "2025-10-01 03:23:37.303757", + "step": 2127, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:37.347551", + "step": 2127, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01480750273913145, + "timestamp": "2025-10-01 03:23:37.383097", + "step": 2128, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:23:40.801088", + "step": 2128, + "epoch": 1 + }, + { + "type": "pplx", + "content": 2376399.9052320635, + "timestamp": "2025-10-01 03:23:40.809418", + "step": 2128, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:40.846323", + "step": 2128, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06393037736415863, + "timestamp": "2025-10-01 03:23:40.856863", + "step": 2129, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:40.905032", + "step": 2129, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010866785421967506, + "timestamp": "2025-10-01 03:23:40.913018", + "step": 2130, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:40.946666", + "step": 2130, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006882339250296354, + "timestamp": "2025-10-01 03:23:40.953786", + "step": 2131, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:40.995417", + "step": 2131, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03147028759121895, + "timestamp": "2025-10-01 03:23:41.025391", + "step": 2132, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:41.065663", + "step": 2132, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03285486251115799, + "timestamp": "2025-10-01 03:23:41.072034", + "step": 2133, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:41.108395", + "step": 2133, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012204012833535671, + "timestamp": "2025-10-01 03:23:41.114046", + "step": 2134, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:41.149447", + "step": 2134, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0476725734770298, + "timestamp": "2025-10-01 03:23:41.155607", + "step": 2135, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:41.192132", + "step": 2135, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01818242482841015, + "timestamp": "2025-10-01 03:23:41.222769", + "step": 2136, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:41.261188", + "step": 2136, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010472500696778297, + "timestamp": "2025-10-01 03:23:41.278967", + "step": 2137, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:41.327942", + "step": 2137, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023034444078803062, + "timestamp": "2025-10-01 03:23:41.347517", + "step": 2138, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:41.409394", + "step": 2138, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022841166704893112, + "timestamp": "2025-10-01 03:23:41.427834", + "step": 2139, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:41.476581", + "step": 2139, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.055662769824266434, + "timestamp": "2025-10-01 03:23:41.509119", + "step": 2140, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:41.550033", + "step": 2140, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03843521326780319, + "timestamp": "2025-10-01 03:23:41.553743", + "step": 2141, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:41.588675", + "step": 2141, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017203526571393013, + "timestamp": "2025-10-01 03:23:41.601686", + "step": 2142, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:41.643813", + "step": 2142, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013864508830010891, + "timestamp": "2025-10-01 03:23:41.652418", + "step": 2143, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:41.695314", + "step": 2143, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014990674331784248, + "timestamp": "2025-10-01 03:23:41.728380", + "step": 2144, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:41.770023", + "step": 2144, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013279713690280914, + "timestamp": "2025-10-01 03:23:41.780205", + "step": 2145, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:41.824061", + "step": 2145, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021290356293320656, + "timestamp": "2025-10-01 03:23:41.837707", + "step": 2146, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:41.884606", + "step": 2146, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029215041548013687, + "timestamp": "2025-10-01 03:23:41.897035", + "step": 2147, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:41.943855", + "step": 2147, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019531745463609695, + "timestamp": "2025-10-01 03:23:41.982955", + "step": 2148, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.034927", + "step": 2148, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035807233303785324, + "timestamp": "2025-10-01 03:23:42.046903", + "step": 2149, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.089069", + "step": 2149, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02521306462585926, + "timestamp": "2025-10-01 03:23:42.098567", + "step": 2150, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:42.141215", + "step": 2150, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03835302218794823, + "timestamp": "2025-10-01 03:23:42.148595", + "step": 2151, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.191401", + "step": 2151, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026964539662003517, + "timestamp": "2025-10-01 03:23:42.225557", + "step": 2152, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.268438", + "step": 2152, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01456108596175909, + "timestamp": "2025-10-01 03:23:42.281519", + "step": 2153, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.321038", + "step": 2153, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04388154298067093, + "timestamp": "2025-10-01 03:23:42.327689", + "step": 2154, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.369565", + "step": 2154, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07709737122058868, + "timestamp": "2025-10-01 03:23:42.383165", + "step": 2155, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.424739", + "step": 2155, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049588125199079514, + "timestamp": "2025-10-01 03:23:42.456733", + "step": 2156, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.498941", + "step": 2156, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03489984571933746, + "timestamp": "2025-10-01 03:23:42.509331", + "step": 2157, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.554188", + "step": 2157, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020812857896089554, + "timestamp": "2025-10-01 03:23:42.560903", + "step": 2158, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.603267", + "step": 2158, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03611064702272415, + "timestamp": "2025-10-01 03:23:42.614907", + "step": 2159, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.664512", + "step": 2159, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005894296336919069, + "timestamp": "2025-10-01 03:23:42.700340", + "step": 2160, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.743149", + "step": 2160, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02394118346273899, + "timestamp": "2025-10-01 03:23:42.761642", + "step": 2161, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.806784", + "step": 2161, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022362712770700455, + "timestamp": "2025-10-01 03:23:42.820967", + "step": 2162, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.865527", + "step": 2162, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012676007114350796, + "timestamp": "2025-10-01 03:23:42.878565", + "step": 2163, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:42.933678", + "step": 2163, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0027236680034548044, + "timestamp": "2025-10-01 03:23:42.962956", + "step": 2164, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.018938", + "step": 2164, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02523030899465084, + "timestamp": "2025-10-01 03:23:43.026939", + "step": 2165, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.071923", + "step": 2165, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04959941655397415, + "timestamp": "2025-10-01 03:23:43.084615", + "step": 2166, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.127051", + "step": 2166, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032288335263729095, + "timestamp": "2025-10-01 03:23:43.137751", + "step": 2167, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.180116", + "step": 2167, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003926360979676247, + "timestamp": "2025-10-01 03:23:43.212933", + "step": 2168, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.255167", + "step": 2168, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05257205292582512, + "timestamp": "2025-10-01 03:23:43.266637", + "step": 2169, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.308233", + "step": 2169, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01879867911338806, + "timestamp": "2025-10-01 03:23:43.322649", + "step": 2170, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.366012", + "step": 2170, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008240017108619213, + "timestamp": "2025-10-01 03:23:43.379615", + "step": 2171, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.423641", + "step": 2171, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029985183849930763, + "timestamp": "2025-10-01 03:23:43.456457", + "step": 2172, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.519263", + "step": 2172, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010423814877867699, + "timestamp": "2025-10-01 03:23:43.529792", + "step": 2173, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:43.574299", + "step": 2173, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015611663460731506, + "timestamp": "2025-10-01 03:23:43.588312", + "step": 2174, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.633854", + "step": 2174, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010287121869623661, + "timestamp": "2025-10-01 03:23:43.648012", + "step": 2175, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.690201", + "step": 2175, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024450866505503654, + "timestamp": "2025-10-01 03:23:43.725635", + "step": 2176, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:43.768917", + "step": 2176, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024218138307332993, + "timestamp": "2025-10-01 03:23:43.781936", + "step": 2177, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.826767", + "step": 2177, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0145065663382411, + "timestamp": "2025-10-01 03:23:43.840362", + "step": 2178, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.896997", + "step": 2178, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0412977859377861, + "timestamp": "2025-10-01 03:23:43.912081", + "step": 2179, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:43.966234", + "step": 2179, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0073566315695643425, + "timestamp": "2025-10-01 03:23:43.998189", + "step": 2180, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:44.054205", + "step": 2180, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006439515855163336, + "timestamp": "2025-10-01 03:23:44.065685", + "step": 2181, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:44.109672", + "step": 2181, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.11167865246534348, + "timestamp": "2025-10-01 03:23:44.119693", + "step": 2182, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:44.163182", + "step": 2182, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038293104618787766, + "timestamp": "2025-10-01 03:23:44.174345", + "step": 2183, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:44.223096", + "step": 2183, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015311656519770622, + "timestamp": "2025-10-01 03:23:44.257766", + "step": 2184, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:44.304452", + "step": 2184, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0402367077767849, + "timestamp": "2025-10-01 03:23:44.311101", + "step": 2185, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:44.353436", + "step": 2185, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04408599063754082, + "timestamp": "2025-10-01 03:23:44.365030", + "step": 2186, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:44.407920", + "step": 2186, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01761074736714363, + "timestamp": "2025-10-01 03:23:44.422593", + "step": 2187, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:44.470787", + "step": 2187, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04406633973121643, + "timestamp": "2025-10-01 03:23:44.507779", + "step": 2188, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:44.555299", + "step": 2188, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02086411602795124, + "timestamp": "2025-10-01 03:23:44.569642", + "step": 2189, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:44.616304", + "step": 2189, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01035247277468443, + "timestamp": "2025-10-01 03:23:44.630449", + "step": 2190, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:44.674295", + "step": 2190, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006801045034080744, + "timestamp": "2025-10-01 03:23:44.682535", + "step": 2191, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:44.725354", + "step": 2191, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02777045965194702, + "timestamp": "2025-10-01 03:23:44.756381", + "step": 2192, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:44.808675", + "step": 2192, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013107122853398323, + "timestamp": "2025-10-01 03:23:44.818180", + "step": 2193, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:44.860700", + "step": 2193, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021783659234642982, + "timestamp": "2025-10-01 03:23:44.870275", + "step": 2194, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:44.913912", + "step": 2194, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0529879592359066, + "timestamp": "2025-10-01 03:23:44.925014", + "step": 2195, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:44.975215", + "step": 2195, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037179168313741684, + "timestamp": "2025-10-01 03:23:45.009259", + "step": 2196, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:45.059457", + "step": 2196, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02782646007835865, + "timestamp": "2025-10-01 03:23:45.072006", + "step": 2197, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:45.116692", + "step": 2197, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01697869412600994, + "timestamp": "2025-10-01 03:23:45.131639", + "step": 2198, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:45.178108", + "step": 2198, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011922498233616352, + "timestamp": "2025-10-01 03:23:45.184637", + "step": 2199, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:45.230528", + "step": 2199, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0600714348256588, + "timestamp": "2025-10-01 03:23:45.267741", + "step": 2200, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:45.325061", + "step": 2200, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049399178475141525, + "timestamp": "2025-10-01 03:23:45.340454", + "step": 2201, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:45.400977", + "step": 2201, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04884195700287819, + "timestamp": "2025-10-01 03:23:45.419164", + "step": 2202, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:45.475483", + "step": 2202, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036612194031476974, + "timestamp": "2025-10-01 03:23:45.486113", + "step": 2203, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:45.539118", + "step": 2203, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028156796470284462, + "timestamp": "2025-10-01 03:23:45.572091", + "step": 2204, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:45.628374", + "step": 2204, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05495106056332588, + "timestamp": "2025-10-01 03:23:45.640140", + "step": 2205, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:45.691671", + "step": 2205, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06741984188556671, + "timestamp": "2025-10-01 03:23:45.695558", + "step": 2206, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:45.759384", + "step": 2206, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007150748278945684, + "timestamp": "2025-10-01 03:23:45.773862", + "step": 2207, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:45.821690", + "step": 2207, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028009803965687752, + "timestamp": "2025-10-01 03:23:45.853279", + "step": 2208, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:45.905343", + "step": 2208, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06575233489274979, + "timestamp": "2025-10-01 03:23:45.916291", + "step": 2209, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:45.982422", + "step": 2209, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01759413257241249, + "timestamp": "2025-10-01 03:23:45.995051", + "step": 2210, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:46.065228", + "step": 2210, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014944241382181644, + "timestamp": "2025-10-01 03:23:46.084786", + "step": 2211, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:46.142328", + "step": 2211, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01962846890091896, + "timestamp": "2025-10-01 03:23:46.180237", + "step": 2212, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:46.228100", + "step": 2212, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04434167966246605, + "timestamp": "2025-10-01 03:23:46.244782", + "step": 2213, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:46.291190", + "step": 2213, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017101231962442398, + "timestamp": "2025-10-01 03:23:46.307065", + "step": 2214, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:46.359618", + "step": 2214, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013600540347397327, + "timestamp": "2025-10-01 03:23:46.377743", + "step": 2215, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:46.434597", + "step": 2215, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06541150063276291, + "timestamp": "2025-10-01 03:23:46.470456", + "step": 2216, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:46.521870", + "step": 2216, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01580369472503662, + "timestamp": "2025-10-01 03:23:46.526252", + "step": 2217, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:46.576838", + "step": 2217, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024046001955866814, + "timestamp": "2025-10-01 03:23:46.592055", + "step": 2218, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:46.637667", + "step": 2218, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020802615210413933, + "timestamp": "2025-10-01 03:23:46.653540", + "step": 2219, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:46.697934", + "step": 2219, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007248580921441317, + "timestamp": "2025-10-01 03:23:46.733179", + "step": 2220, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:46.787587", + "step": 2220, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026751412078738213, + "timestamp": "2025-10-01 03:23:46.802850", + "step": 2221, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:46.872778", + "step": 2221, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010365569964051247, + "timestamp": "2025-10-01 03:23:46.884807", + "step": 2222, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:46.932041", + "step": 2222, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013154041953384876, + "timestamp": "2025-10-01 03:23:46.945420", + "step": 2223, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:46.995119", + "step": 2223, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017690373584628105, + "timestamp": "2025-10-01 03:23:47.020802", + "step": 2224, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.060684", + "step": 2224, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023486467078328133, + "timestamp": "2025-10-01 03:23:47.074715", + "step": 2225, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.124524", + "step": 2225, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02901529148221016, + "timestamp": "2025-10-01 03:23:47.129160", + "step": 2226, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.178091", + "step": 2226, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03638491407036781, + "timestamp": "2025-10-01 03:23:47.194901", + "step": 2227, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.240309", + "step": 2227, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03016485460102558, + "timestamp": "2025-10-01 03:23:47.266611", + "step": 2228, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.319505", + "step": 2228, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026891475543379784, + "timestamp": "2025-10-01 03:23:47.332683", + "step": 2229, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.378561", + "step": 2229, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008674869313836098, + "timestamp": "2025-10-01 03:23:47.390993", + "step": 2230, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.435372", + "step": 2230, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0261529590934515, + "timestamp": "2025-10-01 03:23:47.451251", + "step": 2231, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.498465", + "step": 2231, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007591108791530132, + "timestamp": "2025-10-01 03:23:47.537320", + "step": 2232, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.573840", + "step": 2232, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.047685153782367706, + "timestamp": "2025-10-01 03:23:47.588544", + "step": 2233, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.636276", + "step": 2233, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02140801027417183, + "timestamp": "2025-10-01 03:23:47.640065", + "step": 2234, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.694623", + "step": 2234, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026592925190925598, + "timestamp": "2025-10-01 03:23:47.709934", + "step": 2235, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.775450", + "step": 2235, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0374218225479126, + "timestamp": "2025-10-01 03:23:47.811711", + "step": 2236, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.863921", + "step": 2236, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03713621944189072, + "timestamp": "2025-10-01 03:23:47.879448", + "step": 2237, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.931944", + "step": 2237, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03340288996696472, + "timestamp": "2025-10-01 03:23:47.937089", + "step": 2238, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:47.987650", + "step": 2238, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04496076703071594, + "timestamp": "2025-10-01 03:23:48.002464", + "step": 2239, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:48.047833", + "step": 2239, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0074866642244160175, + "timestamp": "2025-10-01 03:23:48.084021", + "step": 2240, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:48.130118", + "step": 2240, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02910933457314968, + "timestamp": "2025-10-01 03:23:48.134431", + "step": 2241, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:48.169153", + "step": 2241, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026698706671595573, + "timestamp": "2025-10-01 03:23:48.185747", + "step": 2242, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:48.248311", + "step": 2242, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03336990252137184, + "timestamp": "2025-10-01 03:23:48.262731", + "step": 2243, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:48.308616", + "step": 2243, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03006967343389988, + "timestamp": "2025-10-01 03:23:48.335188", + "step": 2244, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:48.372623", + "step": 2244, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023829955607652664, + "timestamp": "2025-10-01 03:23:48.387949", + "step": 2245, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:48.434885", + "step": 2245, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05265750363469124, + "timestamp": "2025-10-01 03:23:48.452645", + "step": 2246, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:48.499903", + "step": 2246, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046338461339473724, + "timestamp": "2025-10-01 03:23:48.515009", + "step": 2247, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:48.576284", + "step": 2247, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01739192195236683, + "timestamp": "2025-10-01 03:23:48.614687", + "step": 2248, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:48.663519", + "step": 2248, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022692354395985603, + "timestamp": "2025-10-01 03:23:48.676337", + "step": 2249, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:48.722950", + "step": 2249, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0057902405969798565, + "timestamp": "2025-10-01 03:23:48.728211", + "step": 2250, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:48.765771", + "step": 2250, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02251155488193035, + "timestamp": "2025-10-01 03:23:48.781038", + "step": 2251, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:48.845073", + "step": 2251, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00823775865137577, + "timestamp": "2025-10-01 03:23:48.871858", + "step": 2252, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:48.907797", + "step": 2252, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05473462492227554, + "timestamp": "2025-10-01 03:23:48.923024", + "step": 2253, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:48.987961", + "step": 2253, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05480130761861801, + "timestamp": "2025-10-01 03:23:48.992379", + "step": 2254, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:49.053786", + "step": 2254, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008519284427165985, + "timestamp": "2025-10-01 03:23:49.058474", + "step": 2255, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:49.108818", + "step": 2255, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04895481839776039, + "timestamp": "2025-10-01 03:23:49.134172", + "step": 2256, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:49.170378", + "step": 2256, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0113839628174901, + "timestamp": "2025-10-01 03:23:49.186553", + "step": 2257, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:49.224248", + "step": 2257, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07009406387805939, + "timestamp": "2025-10-01 03:23:49.229373", + "step": 2258, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:49.265277", + "step": 2258, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02224046364426613, + "timestamp": "2025-10-01 03:23:49.271067", + "step": 2259, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:49.315337", + "step": 2259, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013498918153345585, + "timestamp": "2025-10-01 03:23:49.342309", + "step": 2260, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:49.396907", + "step": 2260, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03198279067873955, + "timestamp": "2025-10-01 03:23:49.402380", + "step": 2261, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:49.452478", + "step": 2261, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0388142354786396, + "timestamp": "2025-10-01 03:23:49.467051", + "step": 2262, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:49.514802", + "step": 2262, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02700703777372837, + "timestamp": "2025-10-01 03:23:49.530830", + "step": 2263, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:49.581080", + "step": 2263, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0380290225148201, + "timestamp": "2025-10-01 03:23:49.619909", + "step": 2264, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:49.668554", + "step": 2264, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027816731482744217, + "timestamp": "2025-10-01 03:23:49.687835", + "step": 2265, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:49.738937", + "step": 2265, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07015695422887802, + "timestamp": "2025-10-01 03:23:49.758092", + "step": 2266, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:49.805686", + "step": 2266, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0427885539829731, + "timestamp": "2025-10-01 03:23:49.810764", + "step": 2267, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:49.846013", + "step": 2267, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02124977484345436, + "timestamp": "2025-10-01 03:23:49.871586", + "step": 2268, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:49.906901", + "step": 2268, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03623292222619057, + "timestamp": "2025-10-01 03:23:49.922650", + "step": 2269, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:49.969962", + "step": 2269, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0568590946495533, + "timestamp": "2025-10-01 03:23:49.984749", + "step": 2270, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:50.047114", + "step": 2270, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014955349266529083, + "timestamp": "2025-10-01 03:23:50.063495", + "step": 2271, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:50.112789", + "step": 2271, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05286140367388725, + "timestamp": "2025-10-01 03:23:50.153689", + "step": 2272, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:50.199691", + "step": 2272, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03710796684026718, + "timestamp": "2025-10-01 03:23:50.215142", + "step": 2273, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:50.276371", + "step": 2273, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009788061492145061, + "timestamp": "2025-10-01 03:23:50.290773", + "step": 2274, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:50.340772", + "step": 2274, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022184163331985474, + "timestamp": "2025-10-01 03:23:50.357256", + "step": 2275, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:50.404890", + "step": 2275, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016798358410596848, + "timestamp": "2025-10-01 03:23:50.441871", + "step": 2276, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:50.488271", + "step": 2276, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035465095192193985, + "timestamp": "2025-10-01 03:23:50.501979", + "step": 2277, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:50.552366", + "step": 2277, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03589557483792305, + "timestamp": "2025-10-01 03:23:50.571226", + "step": 2278, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:50.619893", + "step": 2278, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008583617396652699, + "timestamp": "2025-10-01 03:23:50.638280", + "step": 2279, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:50.689334", + "step": 2279, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03109481744468212, + "timestamp": "2025-10-01 03:23:50.726490", + "step": 2280, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:23:53.806521", + "step": 2280, + "epoch": 1 + }, + { + "type": "pplx", + "content": 2481443.9800659255, + "timestamp": "2025-10-01 03:23:53.820607", + "step": 2280, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:53.862845", + "step": 2280, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038463007658720016, + "timestamp": "2025-10-01 03:23:53.874769", + "step": 2281, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:53.918989", + "step": 2281, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02183273620903492, + "timestamp": "2025-10-01 03:23:53.932102", + "step": 2282, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:53.977974", + "step": 2282, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011775798164308071, + "timestamp": "2025-10-01 03:23:53.989681", + "step": 2283, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:54.032511", + "step": 2283, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043861616402864456, + "timestamp": "2025-10-01 03:23:54.065182", + "step": 2284, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:54.108420", + "step": 2284, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013492545112967491, + "timestamp": "2025-10-01 03:23:54.120708", + "step": 2285, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:54.171227", + "step": 2285, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020792553201317787, + "timestamp": "2025-10-01 03:23:54.185346", + "step": 2286, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:54.229213", + "step": 2286, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036734603345394135, + "timestamp": "2025-10-01 03:23:54.244129", + "step": 2287, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:54.289586", + "step": 2287, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019327251240611076, + "timestamp": "2025-10-01 03:23:54.322114", + "step": 2288, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:54.366845", + "step": 2288, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027719812467694283, + "timestamp": "2025-10-01 03:23:54.378671", + "step": 2289, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:54.422986", + "step": 2289, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0576687753200531, + "timestamp": "2025-10-01 03:23:54.435019", + "step": 2290, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:54.475549", + "step": 2290, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02797837369143963, + "timestamp": "2025-10-01 03:23:54.490111", + "step": 2291, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:23:54.534163", + "step": 2291, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019248131662607193, + "timestamp": "2025-10-01 03:23:54.566679", + "step": 2292, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:54.603295", + "step": 2292, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0032735669519752264, + "timestamp": "2025-10-01 03:23:54.616016", + "step": 2293, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:54.659570", + "step": 2293, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03419351950287819, + "timestamp": "2025-10-01 03:23:54.670178", + "step": 2294, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:54.725444", + "step": 2294, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04187525436282158, + "timestamp": "2025-10-01 03:23:54.736592", + "step": 2295, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:54.780936", + "step": 2295, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02026108093559742, + "timestamp": "2025-10-01 03:23:54.819217", + "step": 2296, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:54.866255", + "step": 2296, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024492377415299416, + "timestamp": "2025-10-01 03:23:54.883900", + "step": 2297, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:54.932021", + "step": 2297, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020680207759141922, + "timestamp": "2025-10-01 03:23:54.948897", + "step": 2298, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:55.005954", + "step": 2298, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01732856221497059, + "timestamp": "2025-10-01 03:23:55.010496", + "step": 2299, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:55.062751", + "step": 2299, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022137995809316635, + "timestamp": "2025-10-01 03:23:55.102745", + "step": 2300, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:55.161265", + "step": 2300, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027662966400384903, + "timestamp": "2025-10-01 03:23:55.180975", + "step": 2301, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:55.230195", + "step": 2301, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01056684274226427, + "timestamp": "2025-10-01 03:23:55.247215", + "step": 2302, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:55.296055", + "step": 2302, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04284011572599411, + "timestamp": "2025-10-01 03:23:55.315726", + "step": 2303, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:55.365595", + "step": 2303, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04651902988553047, + "timestamp": "2025-10-01 03:23:55.406076", + "step": 2304, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:55.452437", + "step": 2304, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037790894508361816, + "timestamp": "2025-10-01 03:23:55.471985", + "step": 2305, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:55.520324", + "step": 2305, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009997544810175896, + "timestamp": "2025-10-01 03:23:55.535057", + "step": 2306, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:55.580576", + "step": 2306, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006217437330633402, + "timestamp": "2025-10-01 03:23:55.590667", + "step": 2307, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:55.633055", + "step": 2307, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017504749819636345, + "timestamp": "2025-10-01 03:23:55.667968", + "step": 2308, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:55.716914", + "step": 2308, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024138327687978745, + "timestamp": "2025-10-01 03:23:55.727442", + "step": 2309, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:55.788710", + "step": 2309, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03886853903532028, + "timestamp": "2025-10-01 03:23:55.800121", + "step": 2310, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:55.844999", + "step": 2310, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.044076498597860336, + "timestamp": "2025-10-01 03:23:55.857088", + "step": 2311, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:55.899559", + "step": 2311, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01941763050854206, + "timestamp": "2025-10-01 03:23:55.932954", + "step": 2312, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:55.973301", + "step": 2312, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03715384751558304, + "timestamp": "2025-10-01 03:23:55.982423", + "step": 2313, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.022800", + "step": 2313, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043237484991550446, + "timestamp": "2025-10-01 03:23:56.034906", + "step": 2314, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.086731", + "step": 2314, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006867398042231798, + "timestamp": "2025-10-01 03:23:56.094780", + "step": 2315, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:56.137733", + "step": 2315, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06428135186433792, + "timestamp": "2025-10-01 03:23:56.169187", + "step": 2316, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.227457", + "step": 2316, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022701023146510124, + "timestamp": "2025-10-01 03:23:56.231990", + "step": 2317, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.267062", + "step": 2317, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002240831730887294, + "timestamp": "2025-10-01 03:23:56.278078", + "step": 2318, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:56.319915", + "step": 2318, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031147535890340805, + "timestamp": "2025-10-01 03:23:56.323909", + "step": 2319, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.364817", + "step": 2319, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03476901724934578, + "timestamp": "2025-10-01 03:23:56.392301", + "step": 2320, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.435401", + "step": 2320, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006498697679489851, + "timestamp": "2025-10-01 03:23:56.447437", + "step": 2321, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.489550", + "step": 2321, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03084760345518589, + "timestamp": "2025-10-01 03:23:56.503195", + "step": 2322, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.547126", + "step": 2322, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01123755518347025, + "timestamp": "2025-10-01 03:23:56.559053", + "step": 2323, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.601336", + "step": 2323, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04128888621926308, + "timestamp": "2025-10-01 03:23:56.634093", + "step": 2324, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.676519", + "step": 2324, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008725638501346111, + "timestamp": "2025-10-01 03:23:56.688396", + "step": 2325, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.730641", + "step": 2325, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05649871006608009, + "timestamp": "2025-10-01 03:23:56.741167", + "step": 2326, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.785954", + "step": 2326, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021953774616122246, + "timestamp": "2025-10-01 03:23:56.797882", + "step": 2327, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.840800", + "step": 2327, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011180246248841286, + "timestamp": "2025-10-01 03:23:56.872622", + "step": 2328, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.917581", + "step": 2328, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002484246389940381, + "timestamp": "2025-10-01 03:23:56.922477", + "step": 2329, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.957965", + "step": 2329, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014979173429310322, + "timestamp": "2025-10-01 03:23:56.961877", + "step": 2330, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:56.996979", + "step": 2330, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04805707931518555, + "timestamp": "2025-10-01 03:23:57.006768", + "step": 2331, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:57.057421", + "step": 2331, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031156908720731735, + "timestamp": "2025-10-01 03:23:57.088953", + "step": 2332, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:57.129410", + "step": 2332, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008230938576161861, + "timestamp": "2025-10-01 03:23:57.139069", + "step": 2333, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:57.176176", + "step": 2333, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0463353767991066, + "timestamp": "2025-10-01 03:23:57.184543", + "step": 2334, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:57.225940", + "step": 2334, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017958905547857285, + "timestamp": "2025-10-01 03:23:57.235139", + "step": 2335, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:57.274967", + "step": 2335, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07003790885210037, + "timestamp": "2025-10-01 03:23:57.305438", + "step": 2336, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:57.349477", + "step": 2336, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03322345018386841, + "timestamp": "2025-10-01 03:23:57.359714", + "step": 2337, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:57.400316", + "step": 2337, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02961520291864872, + "timestamp": "2025-10-01 03:23:57.411542", + "step": 2338, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:57.447476", + "step": 2338, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019384756684303284, + "timestamp": "2025-10-01 03:23:57.458038", + "step": 2339, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:57.497927", + "step": 2339, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019340967759490013, + "timestamp": "2025-10-01 03:23:57.528092", + "step": 2340, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:57.579981", + "step": 2340, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014016400091350079, + "timestamp": "2025-10-01 03:23:57.589240", + "step": 2341, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:57.636184", + "step": 2341, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.09779427200555801, + "timestamp": "2025-10-01 03:23:57.644470", + "step": 2342, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:57.683815", + "step": 2342, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0603022538125515, + "timestamp": "2025-10-01 03:23:57.694135", + "step": 2343, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:57.737037", + "step": 2343, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01612371765077114, + "timestamp": "2025-10-01 03:23:57.767897", + "step": 2344, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:57.810222", + "step": 2344, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03449269011616707, + "timestamp": "2025-10-01 03:23:57.822887", + "step": 2345, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:57.866784", + "step": 2345, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017996491864323616, + "timestamp": "2025-10-01 03:23:57.880989", + "step": 2346, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:57.922899", + "step": 2346, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02048783004283905, + "timestamp": "2025-10-01 03:23:57.935855", + "step": 2347, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:57.978848", + "step": 2347, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05145378038287163, + "timestamp": "2025-10-01 03:23:58.015051", + "step": 2348, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:58.057355", + "step": 2348, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04953596368432045, + "timestamp": "2025-10-01 03:23:58.071453", + "step": 2349, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:58.123309", + "step": 2349, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04622546583414078, + "timestamp": "2025-10-01 03:23:58.135269", + "step": 2350, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:58.179950", + "step": 2350, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007204010151326656, + "timestamp": "2025-10-01 03:23:58.192026", + "step": 2351, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:58.235701", + "step": 2351, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006826937198638916, + "timestamp": "2025-10-01 03:23:58.265272", + "step": 2352, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:58.321215", + "step": 2352, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001385964103974402, + "timestamp": "2025-10-01 03:23:58.327909", + "step": 2353, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:23:58.365655", + "step": 2353, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06780360639095306, + "timestamp": "2025-10-01 03:23:58.373107", + "step": 2354, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:58.422329", + "step": 2354, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009748198091983795, + "timestamp": "2025-10-01 03:23:58.431328", + "step": 2355, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:58.475561", + "step": 2355, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08243217319250107, + "timestamp": "2025-10-01 03:23:58.518262", + "step": 2356, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:58.569759", + "step": 2356, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013837648555636406, + "timestamp": "2025-10-01 03:23:58.587441", + "step": 2357, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:58.654070", + "step": 2357, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011764074675738811, + "timestamp": "2025-10-01 03:23:58.665715", + "step": 2358, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:58.708795", + "step": 2358, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03141136094927788, + "timestamp": "2025-10-01 03:23:58.721980", + "step": 2359, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:58.763200", + "step": 2359, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04584655910730362, + "timestamp": "2025-10-01 03:23:58.796056", + "step": 2360, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:58.840075", + "step": 2360, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.08940447121858597, + "timestamp": "2025-10-01 03:23:58.853757", + "step": 2361, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:58.899432", + "step": 2361, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03480146452784538, + "timestamp": "2025-10-01 03:23:58.912152", + "step": 2362, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:58.956451", + "step": 2362, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03742852807044983, + "timestamp": "2025-10-01 03:23:58.967245", + "step": 2363, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.011530", + "step": 2363, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029510939493775368, + "timestamp": "2025-10-01 03:23:59.045270", + "step": 2364, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.086636", + "step": 2364, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030361425131559372, + "timestamp": "2025-10-01 03:23:59.098765", + "step": 2365, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.152193", + "step": 2365, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029167259112000465, + "timestamp": "2025-10-01 03:23:59.156794", + "step": 2366, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.194403", + "step": 2366, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021530520170927048, + "timestamp": "2025-10-01 03:23:59.207093", + "step": 2367, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.250645", + "step": 2367, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017203358933329582, + "timestamp": "2025-10-01 03:23:59.285862", + "step": 2368, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.328595", + "step": 2368, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.055677883327007294, + "timestamp": "2025-10-01 03:23:59.341160", + "step": 2369, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.384982", + "step": 2369, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017856841906905174, + "timestamp": "2025-10-01 03:23:59.397182", + "step": 2370, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.440499", + "step": 2370, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04901932552456856, + "timestamp": "2025-10-01 03:23:59.452020", + "step": 2371, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.495996", + "step": 2371, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011145291849970818, + "timestamp": "2025-10-01 03:23:59.530130", + "step": 2372, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.578420", + "step": 2372, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0154013242572546, + "timestamp": "2025-10-01 03:23:59.591795", + "step": 2373, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:23:59.636830", + "step": 2373, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05295354127883911, + "timestamp": "2025-10-01 03:23:59.651365", + "step": 2374, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.699346", + "step": 2374, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009273836389183998, + "timestamp": "2025-10-01 03:23:59.709984", + "step": 2375, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.754208", + "step": 2375, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026420939713716507, + "timestamp": "2025-10-01 03:23:59.789340", + "step": 2376, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:23:59.834671", + "step": 2376, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03559308871626854, + "timestamp": "2025-10-01 03:23:59.845583", + "step": 2377, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.889342", + "step": 2377, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03479627147316933, + "timestamp": "2025-10-01 03:23:59.899686", + "step": 2378, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.940806", + "step": 2378, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022992407903075218, + "timestamp": "2025-10-01 03:23:59.952535", + "step": 2379, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:23:59.999301", + "step": 2379, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02026294358074665, + "timestamp": "2025-10-01 03:24:00.033371", + "step": 2380, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:00.078400", + "step": 2380, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04557957872748375, + "timestamp": "2025-10-01 03:24:00.088506", + "step": 2381, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:00.130874", + "step": 2381, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04836162552237511, + "timestamp": "2025-10-01 03:24:00.143342", + "step": 2382, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:00.185294", + "step": 2382, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020681530237197876, + "timestamp": "2025-10-01 03:24:00.196517", + "step": 2383, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:00.241687", + "step": 2383, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03511347249150276, + "timestamp": "2025-10-01 03:24:00.275447", + "step": 2384, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:00.315153", + "step": 2384, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03787479177117348, + "timestamp": "2025-10-01 03:24:00.328387", + "step": 2385, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:00.375283", + "step": 2385, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002906391629949212, + "timestamp": "2025-10-01 03:24:00.386561", + "step": 2386, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:00.426084", + "step": 2386, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02081494964659214, + "timestamp": "2025-10-01 03:24:00.429505", + "step": 2387, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:00.472381", + "step": 2387, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04072757437825203, + "timestamp": "2025-10-01 03:24:00.506659", + "step": 2388, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:00.540899", + "step": 2388, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016087746247649193, + "timestamp": "2025-10-01 03:24:00.552534", + "step": 2389, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:00.591747", + "step": 2389, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00980063434690237, + "timestamp": "2025-10-01 03:24:00.595533", + "step": 2390, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:00.639482", + "step": 2390, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05500326305627823, + "timestamp": "2025-10-01 03:24:00.651559", + "step": 2391, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:00.694814", + "step": 2391, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03468436375260353, + "timestamp": "2025-10-01 03:24:00.729437", + "step": 2392, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:00.776542", + "step": 2392, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016224954277276993, + "timestamp": "2025-10-01 03:24:00.788083", + "step": 2393, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:00.828608", + "step": 2393, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0649244412779808, + "timestamp": "2025-10-01 03:24:00.845674", + "step": 2394, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:00.886422", + "step": 2394, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01711784489452839, + "timestamp": "2025-10-01 03:24:00.896882", + "step": 2395, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:00.936888", + "step": 2395, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0623721182346344, + "timestamp": "2025-10-01 03:24:00.962184", + "step": 2396, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:00.995109", + "step": 2396, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029404858127236366, + "timestamp": "2025-10-01 03:24:01.004520", + "step": 2397, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.046250", + "step": 2397, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04055517539381981, + "timestamp": "2025-10-01 03:24:01.056093", + "step": 2398, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.097254", + "step": 2398, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029140282422304153, + "timestamp": "2025-10-01 03:24:01.109911", + "step": 2399, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:01.152372", + "step": 2399, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028843680396676064, + "timestamp": "2025-10-01 03:24:01.184882", + "step": 2400, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.234191", + "step": 2400, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018885057419538498, + "timestamp": "2025-10-01 03:24:01.243016", + "step": 2401, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.293747", + "step": 2401, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012015653774142265, + "timestamp": "2025-10-01 03:24:01.305269", + "step": 2402, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.339300", + "step": 2402, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021713754162192345, + "timestamp": "2025-10-01 03:24:01.348447", + "step": 2403, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.400703", + "step": 2403, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030486036092042923, + "timestamp": "2025-10-01 03:24:01.432149", + "step": 2404, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:01.470916", + "step": 2404, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025044456124305725, + "timestamp": "2025-10-01 03:24:01.478874", + "step": 2405, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.525470", + "step": 2405, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01852262392640114, + "timestamp": "2025-10-01 03:24:01.537923", + "step": 2406, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.580145", + "step": 2406, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016385242342948914, + "timestamp": "2025-10-01 03:24:01.591647", + "step": 2407, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.633350", + "step": 2407, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0369933657348156, + "timestamp": "2025-10-01 03:24:01.663288", + "step": 2408, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.703232", + "step": 2408, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03189536929130554, + "timestamp": "2025-10-01 03:24:01.720479", + "step": 2409, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.761671", + "step": 2409, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05188439041376114, + "timestamp": "2025-10-01 03:24:01.771019", + "step": 2410, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.824438", + "step": 2410, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026044318452477455, + "timestamp": "2025-10-01 03:24:01.840814", + "step": 2411, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.900340", + "step": 2411, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03707408532500267, + "timestamp": "2025-10-01 03:24:01.938270", + "step": 2412, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:01.992926", + "step": 2412, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01809481345117092, + "timestamp": "2025-10-01 03:24:02.011707", + "step": 2413, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.070571", + "step": 2413, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06997089087963104, + "timestamp": "2025-10-01 03:24:02.088950", + "step": 2414, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.134774", + "step": 2414, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005703681614249945, + "timestamp": "2025-10-01 03:24:02.146287", + "step": 2415, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.187646", + "step": 2415, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014016940258443356, + "timestamp": "2025-10-01 03:24:02.223845", + "step": 2416, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.274387", + "step": 2416, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004279736429452896, + "timestamp": "2025-10-01 03:24:02.284417", + "step": 2417, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.328807", + "step": 2417, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017396384850144386, + "timestamp": "2025-10-01 03:24:02.345408", + "step": 2418, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.395033", + "step": 2418, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036308445036411285, + "timestamp": "2025-10-01 03:24:02.414040", + "step": 2419, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.459836", + "step": 2419, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05502869561314583, + "timestamp": "2025-10-01 03:24:02.500300", + "step": 2420, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.550380", + "step": 2420, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02433156594634056, + "timestamp": "2025-10-01 03:24:02.567371", + "step": 2421, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.619095", + "step": 2421, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023666976019740105, + "timestamp": "2025-10-01 03:24:02.631976", + "step": 2422, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.678935", + "step": 2422, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022372055798768997, + "timestamp": "2025-10-01 03:24:02.694541", + "step": 2423, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.738024", + "step": 2423, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01210565771907568, + "timestamp": "2025-10-01 03:24:02.774619", + "step": 2424, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.821989", + "step": 2424, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.045388348400592804, + "timestamp": "2025-10-01 03:24:02.837699", + "step": 2425, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.881502", + "step": 2425, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018044060096144676, + "timestamp": "2025-10-01 03:24:02.895175", + "step": 2426, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.939806", + "step": 2426, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02911566011607647, + "timestamp": "2025-10-01 03:24:02.944654", + "step": 2427, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:02.990058", + "step": 2427, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04483082517981529, + "timestamp": "2025-10-01 03:24:03.026425", + "step": 2428, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:03.067817", + "step": 2428, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039889637380838394, + "timestamp": "2025-10-01 03:24:03.079645", + "step": 2429, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:03.119515", + "step": 2429, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0450984425842762, + "timestamp": "2025-10-01 03:24:03.128542", + "step": 2430, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:03.175931", + "step": 2430, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.056027282029390335, + "timestamp": "2025-10-01 03:24:03.186308", + "step": 2431, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:03.225947", + "step": 2431, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03237316757440567, + "timestamp": "2025-10-01 03:24:03.260936", + "step": 2432, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:24:05.862366", + "step": 2432, + "epoch": 1 + }, + { + "type": "pplx", + "content": 2244015.90811908, + "timestamp": "2025-10-01 03:24:05.875304", + "step": 2432, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:05.916605", + "step": 2432, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018818458542227745, + "timestamp": "2025-10-01 03:24:05.930941", + "step": 2433, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:05.976923", + "step": 2433, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021452870219945908, + "timestamp": "2025-10-01 03:24:05.988984", + "step": 2434, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:06.038182", + "step": 2434, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019113507121801376, + "timestamp": "2025-10-01 03:24:06.052311", + "step": 2435, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 1, + 80 + ], + "flops": 593517404912 + }, + "timestamp": "2025-10-01 03:24:06.165269", + "step": 2435, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03614775463938713, + "timestamp": "2025-10-01 03:24:06.200839", + "step": 2436, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:06.248113", + "step": 2436, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030113857239484787, + "timestamp": "2025-10-01 03:24:06.264405", + "step": 2437, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:06.309865", + "step": 2437, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026476899161934853, + "timestamp": "2025-10-01 03:24:06.326354", + "step": 2438, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:06.373074", + "step": 2438, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020940719172358513, + "timestamp": "2025-10-01 03:24:06.385864", + "step": 2439, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:06.435512", + "step": 2439, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05523425340652466, + "timestamp": "2025-10-01 03:24:06.471991", + "step": 2440, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:06.519362", + "step": 2440, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.033478062599897385, + "timestamp": "2025-10-01 03:24:06.533926", + "step": 2441, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:06.584376", + "step": 2441, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00428877305239439, + "timestamp": "2025-10-01 03:24:06.596254", + "step": 2442, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:06.642609", + "step": 2442, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02186090685427189, + "timestamp": "2025-10-01 03:24:06.656003", + "step": 2443, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:06.700837", + "step": 2443, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025631843134760857, + "timestamp": "2025-10-01 03:24:06.733856", + "step": 2444, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:06.776834", + "step": 2444, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029722079634666443, + "timestamp": "2025-10-01 03:24:06.788639", + "step": 2445, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:06.832381", + "step": 2445, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020261377096176147, + "timestamp": "2025-10-01 03:24:06.845855", + "step": 2446, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:06.900112", + "step": 2446, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03762875869870186, + "timestamp": "2025-10-01 03:24:06.913926", + "step": 2447, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:06.967069", + "step": 2447, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021125834435224533, + "timestamp": "2025-10-01 03:24:07.004312", + "step": 2448, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:07.060860", + "step": 2448, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.040271129459142685, + "timestamp": "2025-10-01 03:24:07.072888", + "step": 2449, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:07.119523", + "step": 2449, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023183096200227737, + "timestamp": "2025-10-01 03:24:07.132786", + "step": 2450, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:07.180155", + "step": 2450, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016347484663128853, + "timestamp": "2025-10-01 03:24:07.194927", + "step": 2451, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:07.241926", + "step": 2451, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.046576593071222305, + "timestamp": "2025-10-01 03:24:07.280348", + "step": 2452, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:07.328322", + "step": 2452, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019411567598581314, + "timestamp": "2025-10-01 03:24:07.343110", + "step": 2453, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:07.394476", + "step": 2453, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008690332062542439, + "timestamp": "2025-10-01 03:24:07.409098", + "step": 2454, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:07.458155", + "step": 2454, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02599169686436653, + "timestamp": "2025-10-01 03:24:07.470115", + "step": 2455, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:07.526668", + "step": 2455, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022110888734459877, + "timestamp": "2025-10-01 03:24:07.561747", + "step": 2456, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:07.612735", + "step": 2456, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016485530883073807, + "timestamp": "2025-10-01 03:24:07.628371", + "step": 2457, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:07.675225", + "step": 2457, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018104566261172295, + "timestamp": "2025-10-01 03:24:07.690585", + "step": 2458, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:07.737151", + "step": 2458, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.034444425255060196, + "timestamp": "2025-10-01 03:24:07.750789", + "step": 2459, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:07.802681", + "step": 2459, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017411189153790474, + "timestamp": "2025-10-01 03:24:07.838122", + "step": 2460, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:07.886468", + "step": 2460, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012420641258358955, + "timestamp": "2025-10-01 03:24:07.900884", + "step": 2461, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:07.944697", + "step": 2461, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018652508035302162, + "timestamp": "2025-10-01 03:24:07.958999", + "step": 2462, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:08.002117", + "step": 2462, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004275734536349773, + "timestamp": "2025-10-01 03:24:08.017222", + "step": 2463, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:08.061710", + "step": 2463, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019614147022366524, + "timestamp": "2025-10-01 03:24:08.096679", + "step": 2464, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:08.143031", + "step": 2464, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001118886866606772, + "timestamp": "2025-10-01 03:24:08.158537", + "step": 2465, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:08.204672", + "step": 2465, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010537629947066307, + "timestamp": "2025-10-01 03:24:08.218103", + "step": 2466, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:08.265470", + "step": 2466, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02596229687333107, + "timestamp": "2025-10-01 03:24:08.280501", + "step": 2467, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:08.322798", + "step": 2467, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018786951899528503, + "timestamp": "2025-10-01 03:24:08.355304", + "step": 2468, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:08.412916", + "step": 2468, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03330925479531288, + "timestamp": "2025-10-01 03:24:08.424973", + "step": 2469, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:08.469780", + "step": 2469, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03177835792303085, + "timestamp": "2025-10-01 03:24:08.483110", + "step": 2470, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:08.538293", + "step": 2470, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009283670224249363, + "timestamp": "2025-10-01 03:24:08.552698", + "step": 2471, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:08.598395", + "step": 2471, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009003625251352787, + "timestamp": "2025-10-01 03:24:08.632511", + "step": 2472, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:08.675896", + "step": 2472, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008906546048820019, + "timestamp": "2025-10-01 03:24:08.691630", + "step": 2473, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:08.735742", + "step": 2473, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006170553620904684, + "timestamp": "2025-10-01 03:24:08.750219", + "step": 2474, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:08.799788", + "step": 2474, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009324687533080578, + "timestamp": "2025-10-01 03:24:08.811757", + "step": 2475, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:08.853564", + "step": 2475, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013093089684844017, + "timestamp": "2025-10-01 03:24:08.888035", + "step": 2476, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:08.950233", + "step": 2476, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027693908661603928, + "timestamp": "2025-10-01 03:24:08.963049", + "step": 2477, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:09.008892", + "step": 2477, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031382893212139606, + "timestamp": "2025-10-01 03:24:09.023646", + "step": 2478, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:09.076525", + "step": 2478, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006052759941667318, + "timestamp": "2025-10-01 03:24:09.080444", + "step": 2479, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:09.128200", + "step": 2479, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04757783189415932, + "timestamp": "2025-10-01 03:24:09.163701", + "step": 2480, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:09.213656", + "step": 2480, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0039035475347191095, + "timestamp": "2025-10-01 03:24:09.228129", + "step": 2481, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:09.290605", + "step": 2481, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016002152115106583, + "timestamp": "2025-10-01 03:24:09.304947", + "step": 2482, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:09.348601", + "step": 2482, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002930231625214219, + "timestamp": "2025-10-01 03:24:09.366074", + "step": 2483, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:09.411844", + "step": 2483, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03566841408610344, + "timestamp": "2025-10-01 03:24:09.447032", + "step": 2484, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:09.489159", + "step": 2484, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.055276792496442795, + "timestamp": "2025-10-01 03:24:09.501845", + "step": 2485, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:09.560884", + "step": 2485, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016298171132802963, + "timestamp": "2025-10-01 03:24:09.576796", + "step": 2486, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:09.632650", + "step": 2486, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04863464832305908, + "timestamp": "2025-10-01 03:24:09.645951", + "step": 2487, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:24:09.693600", + "step": 2487, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025372618809342384, + "timestamp": "2025-10-01 03:24:09.732222", + "step": 2488, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:09.784877", + "step": 2488, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.09012048691511154, + "timestamp": "2025-10-01 03:24:09.796426", + "step": 2489, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:09.845062", + "step": 2489, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005738694686442614, + "timestamp": "2025-10-01 03:24:09.849522", + "step": 2490, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:09.902569", + "step": 2490, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006995200179517269, + "timestamp": "2025-10-01 03:24:09.915515", + "step": 2491, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:09.957998", + "step": 2491, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03779411315917969, + "timestamp": "2025-10-01 03:24:09.994880", + "step": 2492, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:10.035223", + "step": 2492, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00997820496559143, + "timestamp": "2025-10-01 03:24:10.040862", + "step": 2493, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:10.090289", + "step": 2493, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03411475941538811, + "timestamp": "2025-10-01 03:24:10.105709", + "step": 2494, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:10.151835", + "step": 2494, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012045695912092924, + "timestamp": "2025-10-01 03:24:10.156194", + "step": 2495, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:10.204773", + "step": 2495, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06923691183328629, + "timestamp": "2025-10-01 03:24:10.240567", + "step": 2496, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:10.295134", + "step": 2496, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006898222025483847, + "timestamp": "2025-10-01 03:24:10.311844", + "step": 2497, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:10.367400", + "step": 2497, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017319267615675926, + "timestamp": "2025-10-01 03:24:10.380293", + "step": 2498, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:10.426776", + "step": 2498, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04529703035950661, + "timestamp": "2025-10-01 03:24:10.439946", + "step": 2499, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:10.490178", + "step": 2499, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029018504545092583, + "timestamp": "2025-10-01 03:24:10.527496", + "step": 2500, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 2500", + "timestamp": "2025-10-01 03:24:15.633316", + "step": 2500, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:15.668295", + "step": 2500, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.046781182289123535, + "timestamp": "2025-10-01 03:24:15.679956", + "step": 2501, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:15.722307", + "step": 2501, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003846703330054879, + "timestamp": "2025-10-01 03:24:15.736445", + "step": 2502, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:15.778459", + "step": 2502, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00581169780343771, + "timestamp": "2025-10-01 03:24:15.790849", + "step": 2503, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:15.834221", + "step": 2503, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008143546059727669, + "timestamp": "2025-10-01 03:24:15.867751", + "step": 2504, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:15.908904", + "step": 2504, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06808535009622574, + "timestamp": "2025-10-01 03:24:15.920158", + "step": 2505, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:15.959998", + "step": 2505, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01710103638470173, + "timestamp": "2025-10-01 03:24:15.970994", + "step": 2506, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:16.012320", + "step": 2506, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0256290715187788, + "timestamp": "2025-10-01 03:24:16.026131", + "step": 2507, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:16.076990", + "step": 2507, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01527292001992464, + "timestamp": "2025-10-01 03:24:16.107735", + "step": 2508, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:16.148305", + "step": 2508, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025318268686532974, + "timestamp": "2025-10-01 03:24:16.158688", + "step": 2509, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:16.201477", + "step": 2509, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02404594048857689, + "timestamp": "2025-10-01 03:24:16.214289", + "step": 2510, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:16.259134", + "step": 2510, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06121750548481941, + "timestamp": "2025-10-01 03:24:16.267620", + "step": 2511, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:16.304082", + "step": 2511, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009938931092619896, + "timestamp": "2025-10-01 03:24:16.343377", + "step": 2512, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:16.382156", + "step": 2512, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032513879239559174, + "timestamp": "2025-10-01 03:24:16.385457", + "step": 2513, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:16.425971", + "step": 2513, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032544344663619995, + "timestamp": "2025-10-01 03:24:16.436114", + "step": 2514, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:16.484887", + "step": 2514, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03610702604055405, + "timestamp": "2025-10-01 03:24:16.495297", + "step": 2515, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:16.550406", + "step": 2515, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006857483182102442, + "timestamp": "2025-10-01 03:24:16.582220", + "step": 2516, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:24:16.621884", + "step": 2516, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00951945036649704, + "timestamp": "2025-10-01 03:24:16.632045", + "step": 2517, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:16.675672", + "step": 2517, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05344615504145622, + "timestamp": "2025-10-01 03:24:16.687345", + "step": 2518, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:16.729517", + "step": 2518, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018810367211699486, + "timestamp": "2025-10-01 03:24:16.738639", + "step": 2519, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:16.778862", + "step": 2519, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.044370826333761215, + "timestamp": "2025-10-01 03:24:16.811312", + "step": 2520, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:16.851691", + "step": 2520, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020190922543406487, + "timestamp": "2025-10-01 03:24:16.860888", + "step": 2521, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:16.905505", + "step": 2521, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.031848978251218796, + "timestamp": "2025-10-01 03:24:16.919126", + "step": 2522, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:16.967746", + "step": 2522, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027364173904061317, + "timestamp": "2025-10-01 03:24:16.979141", + "step": 2523, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:17.034004", + "step": 2523, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020781293511390686, + "timestamp": "2025-10-01 03:24:17.060044", + "step": 2524, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:17.109402", + "step": 2524, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0071802204474806786, + "timestamp": "2025-10-01 03:24:17.121444", + "step": 2525, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:17.166703", + "step": 2525, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02068217284977436, + "timestamp": "2025-10-01 03:24:17.177606", + "step": 2526, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:17.223179", + "step": 2526, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03143022581934929, + "timestamp": "2025-10-01 03:24:17.232589", + "step": 2527, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:17.276176", + "step": 2527, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018646085634827614, + "timestamp": "2025-10-01 03:24:17.307700", + "step": 2528, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:17.353259", + "step": 2528, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018459057435393333, + "timestamp": "2025-10-01 03:24:17.357560", + "step": 2529, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:17.395104", + "step": 2529, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017489291727542877, + "timestamp": "2025-10-01 03:24:17.405875", + "step": 2530, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:17.444274", + "step": 2530, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.046796765178442, + "timestamp": "2025-10-01 03:24:17.448358", + "step": 2531, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:17.491592", + "step": 2531, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016053492203354836, + "timestamp": "2025-10-01 03:24:17.526317", + "step": 2532, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:17.565975", + "step": 2532, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0287923701107502, + "timestamp": "2025-10-01 03:24:17.576635", + "step": 2533, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:17.619055", + "step": 2533, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005379381123930216, + "timestamp": "2025-10-01 03:24:17.629530", + "step": 2534, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:17.668326", + "step": 2534, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022992989048361778, + "timestamp": "2025-10-01 03:24:17.680006", + "step": 2535, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:17.719266", + "step": 2535, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016292504966259003, + "timestamp": "2025-10-01 03:24:17.753254", + "step": 2536, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:17.794163", + "step": 2536, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02290448360145092, + "timestamp": "2025-10-01 03:24:17.805184", + "step": 2537, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:17.848454", + "step": 2537, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018538618460297585, + "timestamp": "2025-10-01 03:24:17.854715", + "step": 2538, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:17.889953", + "step": 2538, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0284429844468832, + "timestamp": "2025-10-01 03:24:17.896253", + "step": 2539, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:17.938485", + "step": 2539, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006699207238852978, + "timestamp": "2025-10-01 03:24:17.965775", + "step": 2540, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:18.003341", + "step": 2540, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008707406930625439, + "timestamp": "2025-10-01 03:24:18.009911", + "step": 2541, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.049735", + "step": 2541, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007552083116024733, + "timestamp": "2025-10-01 03:24:18.054983", + "step": 2542, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.113779", + "step": 2542, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.042177047580480576, + "timestamp": "2025-10-01 03:24:18.120966", + "step": 2543, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.167281", + "step": 2543, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021109510213136673, + "timestamp": "2025-10-01 03:24:18.195704", + "step": 2544, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.239045", + "step": 2544, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005530539900064468, + "timestamp": "2025-10-01 03:24:18.244376", + "step": 2545, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.282869", + "step": 2545, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021504158154129982, + "timestamp": "2025-10-01 03:24:18.288411", + "step": 2546, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:18.331393", + "step": 2546, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04950522631406784, + "timestamp": "2025-10-01 03:24:18.337998", + "step": 2547, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.374536", + "step": 2547, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016378749161958694, + "timestamp": "2025-10-01 03:24:18.398378", + "step": 2548, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.432093", + "step": 2548, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03408261016011238, + "timestamp": "2025-10-01 03:24:18.434258", + "step": 2549, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:18.467162", + "step": 2549, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007741670589894056, + "timestamp": "2025-10-01 03:24:18.469500", + "step": 2550, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.505458", + "step": 2550, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.051420293748378754, + "timestamp": "2025-10-01 03:24:18.507871", + "step": 2551, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.545912", + "step": 2551, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011264682747423649, + "timestamp": "2025-10-01 03:24:18.569810", + "step": 2552, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:18.604611", + "step": 2552, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.055213749408721924, + "timestamp": "2025-10-01 03:24:18.609790", + "step": 2553, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.668065", + "step": 2553, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005813300143927336, + "timestamp": "2025-10-01 03:24:18.670293", + "step": 2554, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.723623", + "step": 2554, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012541760690510273, + "timestamp": "2025-10-01 03:24:18.726892", + "step": 2555, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:18.786001", + "step": 2555, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007020608521997929, + "timestamp": "2025-10-01 03:24:18.809822", + "step": 2556, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.844752", + "step": 2556, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002884343732148409, + "timestamp": "2025-10-01 03:24:18.848093", + "step": 2557, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.885656", + "step": 2557, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00828306470066309, + "timestamp": "2025-10-01 03:24:18.887895", + "step": 2558, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.922503", + "step": 2558, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.037140484899282455, + "timestamp": "2025-10-01 03:24:18.924696", + "step": 2559, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:18.961999", + "step": 2559, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00787018146365881, + "timestamp": "2025-10-01 03:24:18.986849", + "step": 2560, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.022995", + "step": 2560, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06163448840379715, + "timestamp": "2025-10-01 03:24:19.025136", + "step": 2561, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.063725", + "step": 2561, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003485900117084384, + "timestamp": "2025-10-01 03:24:19.066251", + "step": 2562, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.107160", + "step": 2562, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035704489797353745, + "timestamp": "2025-10-01 03:24:19.110157", + "step": 2563, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.163024", + "step": 2563, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01557237934321165, + "timestamp": "2025-10-01 03:24:19.186852", + "step": 2564, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.223437", + "step": 2564, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006103037390857935, + "timestamp": "2025-10-01 03:24:19.225779", + "step": 2565, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.261986", + "step": 2565, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022134941536933184, + "timestamp": "2025-10-01 03:24:19.264500", + "step": 2566, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:19.300321", + "step": 2566, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008768970146775246, + "timestamp": "2025-10-01 03:24:19.303205", + "step": 2567, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.340143", + "step": 2567, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007073683198541403, + "timestamp": "2025-10-01 03:24:19.363970", + "step": 2568, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.398382", + "step": 2568, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006822611205279827, + "timestamp": "2025-10-01 03:24:19.400534", + "step": 2569, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.440465", + "step": 2569, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006527920486405492, + "timestamp": "2025-10-01 03:24:19.443452", + "step": 2570, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.475603", + "step": 2570, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020008070394396782, + "timestamp": "2025-10-01 03:24:19.478060", + "step": 2571, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.513193", + "step": 2571, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014462734572589397, + "timestamp": "2025-10-01 03:24:19.537071", + "step": 2572, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:19.572673", + "step": 2572, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021851330995559692, + "timestamp": "2025-10-01 03:24:19.575027", + "step": 2573, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.610097", + "step": 2573, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011355539783835411, + "timestamp": "2025-10-01 03:24:19.612259", + "step": 2574, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:19.643956", + "step": 2574, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010110740549862385, + "timestamp": "2025-10-01 03:24:19.646411", + "step": 2575, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.684300", + "step": 2575, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006273237522691488, + "timestamp": "2025-10-01 03:24:19.707955", + "step": 2576, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.747198", + "step": 2576, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031361435540020466, + "timestamp": "2025-10-01 03:24:19.749860", + "step": 2577, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.781274", + "step": 2577, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028757426887750626, + "timestamp": "2025-10-01 03:24:19.785372", + "step": 2578, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.838789", + "step": 2578, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014824408572167158, + "timestamp": "2025-10-01 03:24:19.841097", + "step": 2579, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.872857", + "step": 2579, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03534097969532013, + "timestamp": "2025-10-01 03:24:19.897568", + "step": 2580, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.939952", + "step": 2580, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010329095646739006, + "timestamp": "2025-10-01 03:24:19.942581", + "step": 2581, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:19.976320", + "step": 2581, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018755387514829636, + "timestamp": "2025-10-01 03:24:19.978461", + "step": 2582, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:20.010908", + "step": 2582, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013891889713704586, + "timestamp": "2025-10-01 03:24:20.013351", + "step": 2583, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:20.067663", + "step": 2583, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01229120884090662, + "timestamp": "2025-10-01 03:24:20.091574", + "step": 2584, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:24:22.685577", + "step": 2584, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2681881.4157422935, + "timestamp": "2025-10-01 03:24:22.688731", + "step": 2584, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:22.720898", + "step": 2584, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005433977581560612, + "timestamp": "2025-10-01 03:24:22.723638", + "step": 2585, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:22.760438", + "step": 2585, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006171396933495998, + "timestamp": "2025-10-01 03:24:22.763298", + "step": 2586, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:22.795649", + "step": 2586, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011194304563105106, + "timestamp": "2025-10-01 03:24:22.798579", + "step": 2587, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:22.829576", + "step": 2587, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006364346481859684, + "timestamp": "2025-10-01 03:24:22.854275", + "step": 2588, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:22.887483", + "step": 2588, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.040438976138830185, + "timestamp": "2025-10-01 03:24:22.905015", + "step": 2589, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:22.940259", + "step": 2589, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030650096014142036, + "timestamp": "2025-10-01 03:24:22.949408", + "step": 2590, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:22.991985", + "step": 2590, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011646231636404991, + "timestamp": "2025-10-01 03:24:23.002760", + "step": 2591, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.044428", + "step": 2591, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03672755882143974, + "timestamp": "2025-10-01 03:24:23.079503", + "step": 2592, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:23.126679", + "step": 2592, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006598520092666149, + "timestamp": "2025-10-01 03:24:23.132162", + "step": 2593, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:23.171902", + "step": 2593, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011292286217212677, + "timestamp": "2025-10-01 03:24:23.174766", + "step": 2594, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.208007", + "step": 2594, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014372700825333595, + "timestamp": "2025-10-01 03:24:23.210453", + "step": 2595, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.242486", + "step": 2595, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020701630041003227, + "timestamp": "2025-10-01 03:24:23.268676", + "step": 2596, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.300627", + "step": 2596, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011333879083395004, + "timestamp": "2025-10-01 03:24:23.303546", + "step": 2597, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.349848", + "step": 2597, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010141921229660511, + "timestamp": "2025-10-01 03:24:23.353335", + "step": 2598, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.391392", + "step": 2598, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011766685172915459, + "timestamp": "2025-10-01 03:24:23.394229", + "step": 2599, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.425405", + "step": 2599, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008573326282203197, + "timestamp": "2025-10-01 03:24:23.452272", + "step": 2600, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:23.484239", + "step": 2600, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007714405655860901, + "timestamp": "2025-10-01 03:24:23.488164", + "step": 2601, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:23.519663", + "step": 2601, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006207666359841824, + "timestamp": "2025-10-01 03:24:23.522424", + "step": 2602, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.575062", + "step": 2602, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01418394036591053, + "timestamp": "2025-10-01 03:24:23.586009", + "step": 2603, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:23.624209", + "step": 2603, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01655757613480091, + "timestamp": "2025-10-01 03:24:23.648495", + "step": 2604, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:23.680590", + "step": 2604, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04483189433813095, + "timestamp": "2025-10-01 03:24:23.683410", + "step": 2605, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.714885", + "step": 2605, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009337909519672394, + "timestamp": "2025-10-01 03:24:23.717756", + "step": 2606, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.750608", + "step": 2606, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02187291905283928, + "timestamp": "2025-10-01 03:24:23.752917", + "step": 2607, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:23.792564", + "step": 2607, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020253141410648823, + "timestamp": "2025-10-01 03:24:23.819338", + "step": 2608, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.853778", + "step": 2608, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030719181522727013, + "timestamp": "2025-10-01 03:24:23.859769", + "step": 2609, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.897049", + "step": 2609, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00130437349434942, + "timestamp": "2025-10-01 03:24:23.899250", + "step": 2610, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.932621", + "step": 2610, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013440550304949284, + "timestamp": "2025-10-01 03:24:23.934712", + "step": 2611, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:23.966235", + "step": 2611, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006855987594462931, + "timestamp": "2025-10-01 03:24:23.990827", + "step": 2612, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.023635", + "step": 2612, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010143989697098732, + "timestamp": "2025-10-01 03:24:24.025925", + "step": 2613, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.070080", + "step": 2613, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004951119190081954, + "timestamp": "2025-10-01 03:24:24.072570", + "step": 2614, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.105139", + "step": 2614, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00044675651588477194, + "timestamp": "2025-10-01 03:24:24.107324", + "step": 2615, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.138283", + "step": 2615, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006782598793506622, + "timestamp": "2025-10-01 03:24:24.162289", + "step": 2616, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:24.194293", + "step": 2616, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002935652853921056, + "timestamp": "2025-10-01 03:24:24.197149", + "step": 2617, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.236978", + "step": 2617, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016829734668135643, + "timestamp": "2025-10-01 03:24:24.239208", + "step": 2618, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.271632", + "step": 2618, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0075536007061600685, + "timestamp": "2025-10-01 03:24:24.274001", + "step": 2619, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:24.306520", + "step": 2619, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027668515220284462, + "timestamp": "2025-10-01 03:24:24.330444", + "step": 2620, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.368880", + "step": 2620, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01325747650116682, + "timestamp": "2025-10-01 03:24:24.371043", + "step": 2621, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.405075", + "step": 2621, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026547093875706196, + "timestamp": "2025-10-01 03:24:24.407360", + "step": 2622, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.438950", + "step": 2622, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006246590637601912, + "timestamp": "2025-10-01 03:24:24.442024", + "step": 2623, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.474620", + "step": 2623, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04244222864508629, + "timestamp": "2025-10-01 03:24:24.498371", + "step": 2624, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.537585", + "step": 2624, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007979833520948887, + "timestamp": "2025-10-01 03:24:24.540117", + "step": 2625, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.586983", + "step": 2625, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0068341451697051525, + "timestamp": "2025-10-01 03:24:24.588881", + "step": 2626, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.620394", + "step": 2626, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.040807418525218964, + "timestamp": "2025-10-01 03:24:24.622509", + "step": 2627, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.657082", + "step": 2627, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003408764023333788, + "timestamp": "2025-10-01 03:24:24.681323", + "step": 2628, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.720647", + "step": 2628, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0442579984664917, + "timestamp": "2025-10-01 03:24:24.723021", + "step": 2629, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.765908", + "step": 2629, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009210084564983845, + "timestamp": "2025-10-01 03:24:24.768326", + "step": 2630, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.805262", + "step": 2630, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029669592157006264, + "timestamp": "2025-10-01 03:24:24.807639", + "step": 2631, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.840117", + "step": 2631, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06396102905273438, + "timestamp": "2025-10-01 03:24:24.864041", + "step": 2632, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:24.897988", + "step": 2632, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004833502229303122, + "timestamp": "2025-10-01 03:24:24.900272", + "step": 2633, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.931630", + "step": 2633, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030268710106611252, + "timestamp": "2025-10-01 03:24:24.933705", + "step": 2634, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:24.966838", + "step": 2634, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.058417923748493195, + "timestamp": "2025-10-01 03:24:24.969123", + "step": 2635, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.006298", + "step": 2635, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05999006703495979, + "timestamp": "2025-10-01 03:24:25.030459", + "step": 2636, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.070126", + "step": 2636, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008207100443542004, + "timestamp": "2025-10-01 03:24:25.073043", + "step": 2637, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:25.103853", + "step": 2637, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01827244833111763, + "timestamp": "2025-10-01 03:24:25.107297", + "step": 2638, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.142838", + "step": 2638, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029443515464663506, + "timestamp": "2025-10-01 03:24:25.145232", + "step": 2639, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.184127", + "step": 2639, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008744167163968086, + "timestamp": "2025-10-01 03:24:25.208060", + "step": 2640, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.246681", + "step": 2640, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021380942780524492, + "timestamp": "2025-10-01 03:24:25.248909", + "step": 2641, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.281059", + "step": 2641, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012057334184646606, + "timestamp": "2025-10-01 03:24:25.283845", + "step": 2642, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.325323", + "step": 2642, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023736804723739624, + "timestamp": "2025-10-01 03:24:25.328099", + "step": 2643, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.361007", + "step": 2643, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007514139171689749, + "timestamp": "2025-10-01 03:24:25.385035", + "step": 2644, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.424129", + "step": 2644, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022688114549964666, + "timestamp": "2025-10-01 03:24:25.426757", + "step": 2645, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.464741", + "step": 2645, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008547540754079819, + "timestamp": "2025-10-01 03:24:25.467009", + "step": 2646, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.503016", + "step": 2646, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05950305983424187, + "timestamp": "2025-10-01 03:24:25.505406", + "step": 2647, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.537953", + "step": 2647, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013914128765463829, + "timestamp": "2025-10-01 03:24:25.561969", + "step": 2648, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:25.593096", + "step": 2648, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011020137928426266, + "timestamp": "2025-10-01 03:24:25.595539", + "step": 2649, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.627676", + "step": 2649, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.08177720755338669, + "timestamp": "2025-10-01 03:24:25.629886", + "step": 2650, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:25.661599", + "step": 2650, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06128353625535965, + "timestamp": "2025-10-01 03:24:25.663873", + "step": 2651, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.696822", + "step": 2651, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005079279653728008, + "timestamp": "2025-10-01 03:24:25.720833", + "step": 2652, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.755221", + "step": 2652, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004888718482106924, + "timestamp": "2025-10-01 03:24:25.757469", + "step": 2653, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.791224", + "step": 2653, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01076643355190754, + "timestamp": "2025-10-01 03:24:25.793608", + "step": 2654, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.825702", + "step": 2654, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024128075689077377, + "timestamp": "2025-10-01 03:24:25.827919", + "step": 2655, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.861976", + "step": 2655, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.08437247574329376, + "timestamp": "2025-10-01 03:24:25.885972", + "step": 2656, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:25.918032", + "step": 2656, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014017016626894474, + "timestamp": "2025-10-01 03:24:25.920319", + "step": 2657, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.952466", + "step": 2657, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008377933874726295, + "timestamp": "2025-10-01 03:24:25.955642", + "step": 2658, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:25.987993", + "step": 2658, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01578461565077305, + "timestamp": "2025-10-01 03:24:25.990561", + "step": 2659, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.023477", + "step": 2659, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020914891734719276, + "timestamp": "2025-10-01 03:24:26.047649", + "step": 2660, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:26.079699", + "step": 2660, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.08028998225927353, + "timestamp": "2025-10-01 03:24:26.082086", + "step": 2661, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.114613", + "step": 2661, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008626872673630714, + "timestamp": "2025-10-01 03:24:26.117085", + "step": 2662, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:26.149678", + "step": 2662, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05398330092430115, + "timestamp": "2025-10-01 03:24:26.152033", + "step": 2663, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.186882", + "step": 2663, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030082134529948235, + "timestamp": "2025-10-01 03:24:26.210949", + "step": 2664, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:26.242798", + "step": 2664, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011516360566020012, + "timestamp": "2025-10-01 03:24:26.244986", + "step": 2665, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.275961", + "step": 2665, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.041629284620285034, + "timestamp": "2025-10-01 03:24:26.278273", + "step": 2666, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.312668", + "step": 2666, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002497976878657937, + "timestamp": "2025-10-01 03:24:26.315044", + "step": 2667, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:26.345581", + "step": 2667, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02258189208805561, + "timestamp": "2025-10-01 03:24:26.369972", + "step": 2668, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.400578", + "step": 2668, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027142247185111046, + "timestamp": "2025-10-01 03:24:26.402877", + "step": 2669, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.433626", + "step": 2669, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011986029334366322, + "timestamp": "2025-10-01 03:24:26.435746", + "step": 2670, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:26.467233", + "step": 2670, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021660828962922096, + "timestamp": "2025-10-01 03:24:26.469857", + "step": 2671, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.500652", + "step": 2671, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018301369855180383, + "timestamp": "2025-10-01 03:24:26.524376", + "step": 2672, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:26.555892", + "step": 2672, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00951095949858427, + "timestamp": "2025-10-01 03:24:26.558054", + "step": 2673, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.589496", + "step": 2673, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011731659062206745, + "timestamp": "2025-10-01 03:24:26.591406", + "step": 2674, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.622754", + "step": 2674, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.047205377370119095, + "timestamp": "2025-10-01 03:24:26.626104", + "step": 2675, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.658272", + "step": 2675, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004016220103949308, + "timestamp": "2025-10-01 03:24:26.682550", + "step": 2676, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.715927", + "step": 2676, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000838952255435288, + "timestamp": "2025-10-01 03:24:26.718531", + "step": 2677, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.752520", + "step": 2677, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.031052980571985245, + "timestamp": "2025-10-01 03:24:26.755125", + "step": 2678, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:26.789420", + "step": 2678, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016921550035476685, + "timestamp": "2025-10-01 03:24:26.795166", + "step": 2679, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.829375", + "step": 2679, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01168261282145977, + "timestamp": "2025-10-01 03:24:26.854627", + "step": 2680, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.890278", + "step": 2680, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03950012847781181, + "timestamp": "2025-10-01 03:24:26.892698", + "step": 2681, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.924233", + "step": 2681, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028938964009284973, + "timestamp": "2025-10-01 03:24:26.926416", + "step": 2682, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:26.956476", + "step": 2682, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022460857406258583, + "timestamp": "2025-10-01 03:24:26.959043", + "step": 2683, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:26.989476", + "step": 2683, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03783725947141647, + "timestamp": "2025-10-01 03:24:27.013602", + "step": 2684, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.053087", + "step": 2684, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028727972880005836, + "timestamp": "2025-10-01 03:24:27.055558", + "step": 2685, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.088247", + "step": 2685, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014340327121317387, + "timestamp": "2025-10-01 03:24:27.090544", + "step": 2686, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:27.122783", + "step": 2686, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02728041633963585, + "timestamp": "2025-10-01 03:24:27.125487", + "step": 2687, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.168866", + "step": 2687, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05393126979470253, + "timestamp": "2025-10-01 03:24:27.197011", + "step": 2688, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:27.229872", + "step": 2688, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06464891135692596, + "timestamp": "2025-10-01 03:24:27.232121", + "step": 2689, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.273535", + "step": 2689, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005298136733472347, + "timestamp": "2025-10-01 03:24:27.276001", + "step": 2690, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.307931", + "step": 2690, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029242748394608498, + "timestamp": "2025-10-01 03:24:27.310409", + "step": 2691, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.341722", + "step": 2691, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022700799629092216, + "timestamp": "2025-10-01 03:24:27.365804", + "step": 2692, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:27.400582", + "step": 2692, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01289203017950058, + "timestamp": "2025-10-01 03:24:27.403167", + "step": 2693, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.438041", + "step": 2693, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009887747466564178, + "timestamp": "2025-10-01 03:24:27.440374", + "step": 2694, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.471387", + "step": 2694, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01738429255783558, + "timestamp": "2025-10-01 03:24:27.473500", + "step": 2695, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.503439", + "step": 2695, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024069195613265038, + "timestamp": "2025-10-01 03:24:27.527200", + "step": 2696, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.557683", + "step": 2696, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.058030545711517334, + "timestamp": "2025-10-01 03:24:27.560111", + "step": 2697, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.591102", + "step": 2697, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01392600778490305, + "timestamp": "2025-10-01 03:24:27.593260", + "step": 2698, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.624449", + "step": 2698, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02428264729678631, + "timestamp": "2025-10-01 03:24:27.626914", + "step": 2699, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.657410", + "step": 2699, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021304620429873466, + "timestamp": "2025-10-01 03:24:27.681053", + "step": 2700, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.716502", + "step": 2700, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014512710273265839, + "timestamp": "2025-10-01 03:24:27.718637", + "step": 2701, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:27.749341", + "step": 2701, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004569448996335268, + "timestamp": "2025-10-01 03:24:27.751648", + "step": 2702, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.782671", + "step": 2702, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016184713691473007, + "timestamp": "2025-10-01 03:24:27.785255", + "step": 2703, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.816530", + "step": 2703, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003670655656605959, + "timestamp": "2025-10-01 03:24:27.840374", + "step": 2704, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.873771", + "step": 2704, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02076827920973301, + "timestamp": "2025-10-01 03:24:27.878466", + "step": 2705, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.917903", + "step": 2705, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04109204187989235, + "timestamp": "2025-10-01 03:24:27.920244", + "step": 2706, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.960303", + "step": 2706, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011366406455636024, + "timestamp": "2025-10-01 03:24:27.962745", + "step": 2707, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:27.995207", + "step": 2707, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005127616226673126, + "timestamp": "2025-10-01 03:24:28.018887", + "step": 2708, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:28.054018", + "step": 2708, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005703740753233433, + "timestamp": "2025-10-01 03:24:28.056652", + "step": 2709, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.101865", + "step": 2709, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022486569359898567, + "timestamp": "2025-10-01 03:24:28.104332", + "step": 2710, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.138886", + "step": 2710, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04439505189657211, + "timestamp": "2025-10-01 03:24:28.140975", + "step": 2711, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:28.171384", + "step": 2711, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0044705611653625965, + "timestamp": "2025-10-01 03:24:28.194806", + "step": 2712, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:28.228113", + "step": 2712, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.08289854973554611, + "timestamp": "2025-10-01 03:24:28.230933", + "step": 2713, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.262928", + "step": 2713, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02313847653567791, + "timestamp": "2025-10-01 03:24:28.265308", + "step": 2714, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.296114", + "step": 2714, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0855262354016304, + "timestamp": "2025-10-01 03:24:28.298453", + "step": 2715, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:28.329174", + "step": 2715, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013367367908358574, + "timestamp": "2025-10-01 03:24:28.352898", + "step": 2716, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.383371", + "step": 2716, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033922861330211163, + "timestamp": "2025-10-01 03:24:28.385708", + "step": 2717, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.416115", + "step": 2717, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06589954346418381, + "timestamp": "2025-10-01 03:24:28.418366", + "step": 2718, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.449067", + "step": 2718, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02594013325870037, + "timestamp": "2025-10-01 03:24:28.451557", + "step": 2719, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.482832", + "step": 2719, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03970567137002945, + "timestamp": "2025-10-01 03:24:28.506867", + "step": 2720, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:28.539946", + "step": 2720, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030360182747244835, + "timestamp": "2025-10-01 03:24:28.542134", + "step": 2721, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.580931", + "step": 2721, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027327288407832384, + "timestamp": "2025-10-01 03:24:28.583032", + "step": 2722, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.617293", + "step": 2722, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001343867159448564, + "timestamp": "2025-10-01 03:24:28.619927", + "step": 2723, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.652321", + "step": 2723, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018844887614250183, + "timestamp": "2025-10-01 03:24:28.676529", + "step": 2724, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.708196", + "step": 2724, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023658042773604393, + "timestamp": "2025-10-01 03:24:28.710350", + "step": 2725, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.742710", + "step": 2725, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023689676076173782, + "timestamp": "2025-10-01 03:24:28.744745", + "step": 2726, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.775705", + "step": 2726, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011798449791967869, + "timestamp": "2025-10-01 03:24:28.777927", + "step": 2727, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.809602", + "step": 2727, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0043214596807956696, + "timestamp": "2025-10-01 03:24:28.835371", + "step": 2728, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.867376", + "step": 2728, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015816621482372284, + "timestamp": "2025-10-01 03:24:28.869595", + "step": 2729, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.900956", + "step": 2729, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04337251931428909, + "timestamp": "2025-10-01 03:24:28.903330", + "step": 2730, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:28.934928", + "step": 2730, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02490677498281002, + "timestamp": "2025-10-01 03:24:28.937600", + "step": 2731, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:28.971571", + "step": 2731, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021258732303977013, + "timestamp": "2025-10-01 03:24:28.995449", + "step": 2732, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:29.028120", + "step": 2732, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013900687918066978, + "timestamp": "2025-10-01 03:24:29.030672", + "step": 2733, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:29.063103", + "step": 2733, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012769629247486591, + "timestamp": "2025-10-01 03:24:29.065609", + "step": 2734, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:29.098527", + "step": 2734, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005780296865850687, + "timestamp": "2025-10-01 03:24:29.100877", + "step": 2735, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:29.132868", + "step": 2735, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006883249618113041, + "timestamp": "2025-10-01 03:24:29.156619", + "step": 2736, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:24:31.443611", + "step": 2736, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2606142.4715422043, + "timestamp": "2025-10-01 03:24:31.446535", + "step": 2736, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:31.477388", + "step": 2736, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003208174603059888, + "timestamp": "2025-10-01 03:24:31.480123", + "step": 2737, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:31.523989", + "step": 2737, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01346578449010849, + "timestamp": "2025-10-01 03:24:31.528100", + "step": 2738, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:31.563812", + "step": 2738, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02442919835448265, + "timestamp": "2025-10-01 03:24:31.566518", + "step": 2739, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:31.598425", + "step": 2739, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008351706899702549, + "timestamp": "2025-10-01 03:24:31.623035", + "step": 2740, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:31.655109", + "step": 2740, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04374644532799721, + "timestamp": "2025-10-01 03:24:31.657996", + "step": 2741, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:31.695103", + "step": 2741, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06427619606256485, + "timestamp": "2025-10-01 03:24:31.698290", + "step": 2742, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:31.729990", + "step": 2742, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0040261028334498405, + "timestamp": "2025-10-01 03:24:31.732451", + "step": 2743, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:31.763215", + "step": 2743, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006850972305983305, + "timestamp": "2025-10-01 03:24:31.787278", + "step": 2744, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:31.831738", + "step": 2744, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01802860200405121, + "timestamp": "2025-10-01 03:24:31.834254", + "step": 2745, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:31.864993", + "step": 2745, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03180265054106712, + "timestamp": "2025-10-01 03:24:31.867028", + "step": 2746, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:31.897550", + "step": 2746, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02863389626145363, + "timestamp": "2025-10-01 03:24:31.899764", + "step": 2747, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:31.930668", + "step": 2747, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005860605277121067, + "timestamp": "2025-10-01 03:24:31.954496", + "step": 2748, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:31.986113", + "step": 2748, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01748032495379448, + "timestamp": "2025-10-01 03:24:31.988499", + "step": 2749, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:32.023311", + "step": 2749, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004278966691344976, + "timestamp": "2025-10-01 03:24:32.025759", + "step": 2750, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.056353", + "step": 2750, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007565335836261511, + "timestamp": "2025-10-01 03:24:32.058758", + "step": 2751, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.090193", + "step": 2751, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01153175812214613, + "timestamp": "2025-10-01 03:24:32.114005", + "step": 2752, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:32.145832", + "step": 2752, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015198581852018833, + "timestamp": "2025-10-01 03:24:32.148427", + "step": 2753, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.179222", + "step": 2753, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03041188418865204, + "timestamp": "2025-10-01 03:24:32.181447", + "step": 2754, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.212254", + "step": 2754, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033942125737667084, + "timestamp": "2025-10-01 03:24:32.214391", + "step": 2755, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.245973", + "step": 2755, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005134577862918377, + "timestamp": "2025-10-01 03:24:32.269683", + "step": 2756, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.301273", + "step": 2756, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004763012286275625, + "timestamp": "2025-10-01 03:24:32.303644", + "step": 2757, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:32.334927", + "step": 2757, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010170862078666687, + "timestamp": "2025-10-01 03:24:32.337096", + "step": 2758, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:32.367304", + "step": 2758, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009652599692344666, + "timestamp": "2025-10-01 03:24:32.369555", + "step": 2759, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.400383", + "step": 2759, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010590357705950737, + "timestamp": "2025-10-01 03:24:32.424734", + "step": 2760, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:32.456265", + "step": 2760, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04106386378407478, + "timestamp": "2025-10-01 03:24:32.458950", + "step": 2761, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.490670", + "step": 2761, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.051176588982343674, + "timestamp": "2025-10-01 03:24:32.493851", + "step": 2762, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.526791", + "step": 2762, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009605606086552143, + "timestamp": "2025-10-01 03:24:32.529255", + "step": 2763, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.560039", + "step": 2763, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011899854056537151, + "timestamp": "2025-10-01 03:24:32.584216", + "step": 2764, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.614222", + "step": 2764, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06248905137181282, + "timestamp": "2025-10-01 03:24:32.616641", + "step": 2765, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.646750", + "step": 2765, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029454808682203293, + "timestamp": "2025-10-01 03:24:32.648954", + "step": 2766, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.680674", + "step": 2766, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024107948411256075, + "timestamp": "2025-10-01 03:24:32.683111", + "step": 2767, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:32.713961", + "step": 2767, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005615733563899994, + "timestamp": "2025-10-01 03:24:32.737700", + "step": 2768, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.769775", + "step": 2768, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011980015551671386, + "timestamp": "2025-10-01 03:24:32.771881", + "step": 2769, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.803698", + "step": 2769, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03797609359025955, + "timestamp": "2025-10-01 03:24:32.806507", + "step": 2770, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.836598", + "step": 2770, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03512263298034668, + "timestamp": "2025-10-01 03:24:32.838871", + "step": 2771, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.869975", + "step": 2771, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03625377640128136, + "timestamp": "2025-10-01 03:24:32.893853", + "step": 2772, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.932215", + "step": 2772, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05505125969648361, + "timestamp": "2025-10-01 03:24:32.934456", + "step": 2773, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.965351", + "step": 2773, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.044585298746824265, + "timestamp": "2025-10-01 03:24:32.967641", + "step": 2774, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:32.998088", + "step": 2774, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02881637215614319, + "timestamp": "2025-10-01 03:24:33.000266", + "step": 2775, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:33.030679", + "step": 2775, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018388073658570647, + "timestamp": "2025-10-01 03:24:33.054700", + "step": 2776, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.085090", + "step": 2776, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007844952051527798, + "timestamp": "2025-10-01 03:24:33.088318", + "step": 2777, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.119407", + "step": 2777, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005043074022978544, + "timestamp": "2025-10-01 03:24:33.121997", + "step": 2778, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.154238", + "step": 2778, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016931960359215736, + "timestamp": "2025-10-01 03:24:33.156706", + "step": 2779, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.198386", + "step": 2779, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016218816861510277, + "timestamp": "2025-10-01 03:24:33.222541", + "step": 2780, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:33.256124", + "step": 2780, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014403719687834382, + "timestamp": "2025-10-01 03:24:33.258247", + "step": 2781, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.290415", + "step": 2781, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009550698101520538, + "timestamp": "2025-10-01 03:24:33.292754", + "step": 2782, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.327234", + "step": 2782, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006318315863609314, + "timestamp": "2025-10-01 03:24:33.329655", + "step": 2783, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.361589", + "step": 2783, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016444766893982887, + "timestamp": "2025-10-01 03:24:33.385382", + "step": 2784, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.416990", + "step": 2784, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00390628632158041, + "timestamp": "2025-10-01 03:24:33.419205", + "step": 2785, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.449785", + "step": 2785, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02850634790956974, + "timestamp": "2025-10-01 03:24:33.452090", + "step": 2786, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.483810", + "step": 2786, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007279823999851942, + "timestamp": "2025-10-01 03:24:33.488050", + "step": 2787, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.519106", + "step": 2787, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014204640872776508, + "timestamp": "2025-10-01 03:24:33.543256", + "step": 2788, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.578485", + "step": 2788, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014972885139286518, + "timestamp": "2025-10-01 03:24:33.580760", + "step": 2789, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.610859", + "step": 2789, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007723831571638584, + "timestamp": "2025-10-01 03:24:33.613183", + "step": 2790, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:33.643848", + "step": 2790, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004114118870347738, + "timestamp": "2025-10-01 03:24:33.646102", + "step": 2791, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.676408", + "step": 2791, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.052376143634319305, + "timestamp": "2025-10-01 03:24:33.700104", + "step": 2792, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.734447", + "step": 2792, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014478874392807484, + "timestamp": "2025-10-01 03:24:33.736629", + "step": 2793, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.770157", + "step": 2793, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028368551284074783, + "timestamp": "2025-10-01 03:24:33.772437", + "step": 2794, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.802667", + "step": 2794, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006240946240723133, + "timestamp": "2025-10-01 03:24:33.804781", + "step": 2795, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.834982", + "step": 2795, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00420558312907815, + "timestamp": "2025-10-01 03:24:33.858734", + "step": 2796, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.889439", + "step": 2796, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008318249019794166, + "timestamp": "2025-10-01 03:24:33.893170", + "step": 2797, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.923667", + "step": 2797, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005273931659758091, + "timestamp": "2025-10-01 03:24:33.925889", + "step": 2798, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:33.956164", + "step": 2798, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011751907877624035, + "timestamp": "2025-10-01 03:24:33.958789", + "step": 2799, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:33.989974", + "step": 2799, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020359144546091557, + "timestamp": "2025-10-01 03:24:34.013777", + "step": 2800, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.045684", + "step": 2800, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028513476252555847, + "timestamp": "2025-10-01 03:24:34.048980", + "step": 2801, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.088259", + "step": 2801, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01195278950035572, + "timestamp": "2025-10-01 03:24:34.090463", + "step": 2802, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.121781", + "step": 2802, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004600042011588812, + "timestamp": "2025-10-01 03:24:34.123902", + "step": 2803, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.154289", + "step": 2803, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003123464062809944, + "timestamp": "2025-10-01 03:24:34.188592", + "step": 2804, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:34.218766", + "step": 2804, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010693421587347984, + "timestamp": "2025-10-01 03:24:34.221570", + "step": 2805, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.255667", + "step": 2805, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0337144173681736, + "timestamp": "2025-10-01 03:24:34.257917", + "step": 2806, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.288649", + "step": 2806, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03025749698281288, + "timestamp": "2025-10-01 03:24:34.290960", + "step": 2807, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.321365", + "step": 2807, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007211410440504551, + "timestamp": "2025-10-01 03:24:34.346283", + "step": 2808, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.377342", + "step": 2808, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013912279391661286, + "timestamp": "2025-10-01 03:24:34.379466", + "step": 2809, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.410025", + "step": 2809, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010679290629923344, + "timestamp": "2025-10-01 03:24:34.412169", + "step": 2810, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.442528", + "step": 2810, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032066688872873783, + "timestamp": "2025-10-01 03:24:34.444884", + "step": 2811, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.475174", + "step": 2811, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00490538077428937, + "timestamp": "2025-10-01 03:24:34.499027", + "step": 2812, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.529943", + "step": 2812, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005414978368207812, + "timestamp": "2025-10-01 03:24:34.532533", + "step": 2813, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.563784", + "step": 2813, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0067076124250888824, + "timestamp": "2025-10-01 03:24:34.565947", + "step": 2814, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.597163", + "step": 2814, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010998489335179329, + "timestamp": "2025-10-01 03:24:34.599496", + "step": 2815, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.630340", + "step": 2815, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020941266790032387, + "timestamp": "2025-10-01 03:24:34.654145", + "step": 2816, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.687103", + "step": 2816, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010395586723461747, + "timestamp": "2025-10-01 03:24:34.689343", + "step": 2817, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.722184", + "step": 2817, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022756259888410568, + "timestamp": "2025-10-01 03:24:34.724577", + "step": 2818, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.755379", + "step": 2818, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006008595693856478, + "timestamp": "2025-10-01 03:24:34.757823", + "step": 2819, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.790187", + "step": 2819, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026754813734441996, + "timestamp": "2025-10-01 03:24:34.814432", + "step": 2820, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:34.845585", + "step": 2820, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03289053589105606, + "timestamp": "2025-10-01 03:24:34.848013", + "step": 2821, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.879139", + "step": 2821, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012909863144159317, + "timestamp": "2025-10-01 03:24:34.881366", + "step": 2822, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:34.912471", + "step": 2822, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01707497425377369, + "timestamp": "2025-10-01 03:24:34.914666", + "step": 2823, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:34.945111", + "step": 2823, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0377124659717083, + "timestamp": "2025-10-01 03:24:34.969941", + "step": 2824, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:35.000628", + "step": 2824, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017130982130765915, + "timestamp": "2025-10-01 03:24:35.003100", + "step": 2825, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:35.038100", + "step": 2825, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05069473385810852, + "timestamp": "2025-10-01 03:24:35.040314", + "step": 2826, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.070674", + "step": 2826, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.09277808666229248, + "timestamp": "2025-10-01 03:24:35.073011", + "step": 2827, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.112327", + "step": 2827, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005114036612212658, + "timestamp": "2025-10-01 03:24:35.136067", + "step": 2828, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:35.166848", + "step": 2828, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018048852682113647, + "timestamp": "2025-10-01 03:24:35.169266", + "step": 2829, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:35.201060", + "step": 2829, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000981090241111815, + "timestamp": "2025-10-01 03:24:35.203982", + "step": 2830, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:35.234938", + "step": 2830, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02370641566812992, + "timestamp": "2025-10-01 03:24:35.238958", + "step": 2831, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.272148", + "step": 2831, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004801338363904506, + "timestamp": "2025-10-01 03:24:35.295973", + "step": 2832, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.328843", + "step": 2832, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04118427261710167, + "timestamp": "2025-10-01 03:24:35.331095", + "step": 2833, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.361504", + "step": 2833, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004330991301685572, + "timestamp": "2025-10-01 03:24:35.363774", + "step": 2834, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.394148", + "step": 2834, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0724225714802742, + "timestamp": "2025-10-01 03:24:35.396667", + "step": 2835, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.430661", + "step": 2835, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.07949233800172806, + "timestamp": "2025-10-01 03:24:35.454318", + "step": 2836, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.486198", + "step": 2836, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004666280932724476, + "timestamp": "2025-10-01 03:24:35.488692", + "step": 2837, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.519594", + "step": 2837, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001579765579663217, + "timestamp": "2025-10-01 03:24:35.522261", + "step": 2838, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.553063", + "step": 2838, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018666401505470276, + "timestamp": "2025-10-01 03:24:35.555196", + "step": 2839, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.585542", + "step": 2839, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03181535378098488, + "timestamp": "2025-10-01 03:24:35.609332", + "step": 2840, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:24:35.640963", + "step": 2840, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030467117205262184, + "timestamp": "2025-10-01 03:24:35.643030", + "step": 2841, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.673860", + "step": 2841, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008755732560530305, + "timestamp": "2025-10-01 03:24:35.676203", + "step": 2842, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.707404", + "step": 2842, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010295799002051353, + "timestamp": "2025-10-01 03:24:35.710565", + "step": 2843, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.743229", + "step": 2843, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017700472846627235, + "timestamp": "2025-10-01 03:24:35.767152", + "step": 2844, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.800940", + "step": 2844, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020167885813862085, + "timestamp": "2025-10-01 03:24:35.803392", + "step": 2845, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.840243", + "step": 2845, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03271046280860901, + "timestamp": "2025-10-01 03:24:35.844063", + "step": 2846, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:35.874533", + "step": 2846, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.037170324474573135, + "timestamp": "2025-10-01 03:24:35.876810", + "step": 2847, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.907609", + "step": 2847, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004880365449935198, + "timestamp": "2025-10-01 03:24:35.931408", + "step": 2848, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:35.962216", + "step": 2848, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013986615231260657, + "timestamp": "2025-10-01 03:24:35.964784", + "step": 2849, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:35.995510", + "step": 2849, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012827560305595398, + "timestamp": "2025-10-01 03:24:35.997776", + "step": 2850, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:36.028801", + "step": 2850, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016685588285326958, + "timestamp": "2025-10-01 03:24:36.031366", + "step": 2851, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.062585", + "step": 2851, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004869420547038317, + "timestamp": "2025-10-01 03:24:36.086388", + "step": 2852, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.116537", + "step": 2852, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022817237302660942, + "timestamp": "2025-10-01 03:24:36.118949", + "step": 2853, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.150082", + "step": 2853, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0161379836499691, + "timestamp": "2025-10-01 03:24:36.154000", + "step": 2854, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.187058", + "step": 2854, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004206997808068991, + "timestamp": "2025-10-01 03:24:36.189310", + "step": 2855, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.221898", + "step": 2855, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03606009855866432, + "timestamp": "2025-10-01 03:24:36.245989", + "step": 2856, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:36.294544", + "step": 2856, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017222873866558075, + "timestamp": "2025-10-01 03:24:36.296739", + "step": 2857, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:36.329499", + "step": 2857, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025777725502848625, + "timestamp": "2025-10-01 03:24:36.332357", + "step": 2858, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.363855", + "step": 2858, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024949783459305763, + "timestamp": "2025-10-01 03:24:36.366402", + "step": 2859, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.398157", + "step": 2859, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04462224245071411, + "timestamp": "2025-10-01 03:24:36.422080", + "step": 2860, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.453173", + "step": 2860, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006636906415224075, + "timestamp": "2025-10-01 03:24:36.455629", + "step": 2861, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.487688", + "step": 2861, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018027275800704956, + "timestamp": "2025-10-01 03:24:36.489989", + "step": 2862, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:36.521773", + "step": 2862, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011289690621197224, + "timestamp": "2025-10-01 03:24:36.524084", + "step": 2863, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.561338", + "step": 2863, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015443069860339165, + "timestamp": "2025-10-01 03:24:36.586597", + "step": 2864, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.619312", + "step": 2864, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017642194870859385, + "timestamp": "2025-10-01 03:24:36.621500", + "step": 2865, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.653055", + "step": 2865, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020965853706002235, + "timestamp": "2025-10-01 03:24:36.655899", + "step": 2866, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.687582", + "step": 2866, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017679851735010743, + "timestamp": "2025-10-01 03:24:36.690382", + "step": 2867, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.721652", + "step": 2867, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015998424496501684, + "timestamp": "2025-10-01 03:24:36.745401", + "step": 2868, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.775526", + "step": 2868, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06192175671458244, + "timestamp": "2025-10-01 03:24:36.777836", + "step": 2869, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.808498", + "step": 2869, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.044260360300540924, + "timestamp": "2025-10-01 03:24:36.810743", + "step": 2870, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:36.843567", + "step": 2870, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00788553524762392, + "timestamp": "2025-10-01 03:24:36.846016", + "step": 2871, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.876376", + "step": 2871, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036969296634197235, + "timestamp": "2025-10-01 03:24:36.900020", + "step": 2872, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.934821", + "step": 2872, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0182977132499218, + "timestamp": "2025-10-01 03:24:36.937378", + "step": 2873, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:36.970681", + "step": 2873, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.031018976122140884, + "timestamp": "2025-10-01 03:24:36.972919", + "step": 2874, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:37.004825", + "step": 2874, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01970214955508709, + "timestamp": "2025-10-01 03:24:37.007353", + "step": 2875, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:37.039179", + "step": 2875, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024209399707615376, + "timestamp": "2025-10-01 03:24:37.063083", + "step": 2876, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:37.095829", + "step": 2876, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03199225664138794, + "timestamp": "2025-10-01 03:24:37.098257", + "step": 2877, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:37.132151", + "step": 2877, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00936694722622633, + "timestamp": "2025-10-01 03:24:37.134752", + "step": 2878, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:37.167005", + "step": 2878, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006227897945791483, + "timestamp": "2025-10-01 03:24:37.169406", + "step": 2879, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:37.201663", + "step": 2879, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02771577052772045, + "timestamp": "2025-10-01 03:24:37.225299", + "step": 2880, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:37.258537", + "step": 2880, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01626940257847309, + "timestamp": "2025-10-01 03:24:37.261745", + "step": 2881, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:37.298361", + "step": 2881, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04238307848572731, + "timestamp": "2025-10-01 03:24:37.300509", + "step": 2882, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:37.331603", + "step": 2882, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002271620323881507, + "timestamp": "2025-10-01 03:24:37.334708", + "step": 2883, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:37.371271", + "step": 2883, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.036302290856838226, + "timestamp": "2025-10-01 03:24:37.395020", + "step": 2884, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:37.433473", + "step": 2884, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005658892448991537, + "timestamp": "2025-10-01 03:24:37.436222", + "step": 2885, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:37.472866", + "step": 2885, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00607361039146781, + "timestamp": "2025-10-01 03:24:37.475176", + "step": 2886, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:37.508798", + "step": 2886, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003872368484735489, + "timestamp": "2025-10-01 03:24:37.511075", + "step": 2887, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:37.541506", + "step": 2887, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.034229908138513565, + "timestamp": "2025-10-01 03:24:37.565471", + "step": 2888, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:24:39.809062", + "step": 2888, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2451716.6970244255, + "timestamp": "2025-10-01 03:24:39.811315", + "step": 2888, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:39.841540", + "step": 2888, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03590374439954758, + "timestamp": "2025-10-01 03:24:39.843876", + "step": 2889, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:39.878898", + "step": 2889, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01038043387234211, + "timestamp": "2025-10-01 03:24:39.881138", + "step": 2890, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:39.912611", + "step": 2890, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009571820497512817, + "timestamp": "2025-10-01 03:24:39.915291", + "step": 2891, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:39.947618", + "step": 2891, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.09302639961242676, + "timestamp": "2025-10-01 03:24:39.971683", + "step": 2892, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.003094", + "step": 2892, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005731052719056606, + "timestamp": "2025-10-01 03:24:40.005273", + "step": 2893, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.040773", + "step": 2893, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007477765437215567, + "timestamp": "2025-10-01 03:24:40.043991", + "step": 2894, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:40.075261", + "step": 2894, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013892519287765026, + "timestamp": "2025-10-01 03:24:40.077445", + "step": 2895, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.108524", + "step": 2895, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019141344353556633, + "timestamp": "2025-10-01 03:24:40.132390", + "step": 2896, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.168680", + "step": 2896, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013786937110126019, + "timestamp": "2025-10-01 03:24:40.171115", + "step": 2897, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.202812", + "step": 2897, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015081922523677349, + "timestamp": "2025-10-01 03:24:40.205250", + "step": 2898, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.236125", + "step": 2898, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0199117511510849, + "timestamp": "2025-10-01 03:24:40.238495", + "step": 2899, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.269845", + "step": 2899, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007549181114882231, + "timestamp": "2025-10-01 03:24:40.293643", + "step": 2900, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.324850", + "step": 2900, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.031069418415427208, + "timestamp": "2025-10-01 03:24:40.327012", + "step": 2901, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.358803", + "step": 2901, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03852277249097824, + "timestamp": "2025-10-01 03:24:40.361814", + "step": 2902, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.395754", + "step": 2902, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029911410063505173, + "timestamp": "2025-10-01 03:24:40.397963", + "step": 2903, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:40.429230", + "step": 2903, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017330342903733253, + "timestamp": "2025-10-01 03:24:40.453163", + "step": 2904, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.484457", + "step": 2904, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003823334351181984, + "timestamp": "2025-10-01 03:24:40.486930", + "step": 2905, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.518093", + "step": 2905, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023877620697021484, + "timestamp": "2025-10-01 03:24:40.520361", + "step": 2906, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.552203", + "step": 2906, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023647157475352287, + "timestamp": "2025-10-01 03:24:40.554745", + "step": 2907, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.586214", + "step": 2907, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025263678282499313, + "timestamp": "2025-10-01 03:24:40.609978", + "step": 2908, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.643067", + "step": 2908, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01007988303899765, + "timestamp": "2025-10-01 03:24:40.645405", + "step": 2909, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.676611", + "step": 2909, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03279213234782219, + "timestamp": "2025-10-01 03:24:40.678853", + "step": 2910, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.710049", + "step": 2910, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008212363347411156, + "timestamp": "2025-10-01 03:24:40.712868", + "step": 2911, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:40.744549", + "step": 2911, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019713619723916054, + "timestamp": "2025-10-01 03:24:40.768703", + "step": 2912, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.806293", + "step": 2912, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03991119936108589, + "timestamp": "2025-10-01 03:24:40.808667", + "step": 2913, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:40.841322", + "step": 2913, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02184058167040348, + "timestamp": "2025-10-01 03:24:40.843631", + "step": 2914, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:40.874978", + "step": 2914, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011553935706615448, + "timestamp": "2025-10-01 03:24:40.877357", + "step": 2915, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:40.909842", + "step": 2915, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05757821351289749, + "timestamp": "2025-10-01 03:24:40.934364", + "step": 2916, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:40.965809", + "step": 2916, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016454637050628662, + "timestamp": "2025-10-01 03:24:40.968074", + "step": 2917, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.000756", + "step": 2917, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.048725757747888565, + "timestamp": "2025-10-01 03:24:41.002990", + "step": 2918, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.041721", + "step": 2918, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017710041254758835, + "timestamp": "2025-10-01 03:24:41.044034", + "step": 2919, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:41.081859", + "step": 2919, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009317291900515556, + "timestamp": "2025-10-01 03:24:41.105936", + "step": 2920, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:41.137555", + "step": 2920, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01461828500032425, + "timestamp": "2025-10-01 03:24:41.139749", + "step": 2921, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.170727", + "step": 2921, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0645054429769516, + "timestamp": "2025-10-01 03:24:41.179933", + "step": 2922, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.212757", + "step": 2922, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017023341730237007, + "timestamp": "2025-10-01 03:24:41.215290", + "step": 2923, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:41.246118", + "step": 2923, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011360278353095055, + "timestamp": "2025-10-01 03:24:41.269889", + "step": 2924, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.300408", + "step": 2924, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.054821137338876724, + "timestamp": "2025-10-01 03:24:41.302712", + "step": 2925, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.336545", + "step": 2925, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004073480609804392, + "timestamp": "2025-10-01 03:24:41.338833", + "step": 2926, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.369671", + "step": 2926, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018687637522816658, + "timestamp": "2025-10-01 03:24:41.373192", + "step": 2927, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:41.405067", + "step": 2927, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013631862588226795, + "timestamp": "2025-10-01 03:24:41.429224", + "step": 2928, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.460125", + "step": 2928, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05112210661172867, + "timestamp": "2025-10-01 03:24:41.462253", + "step": 2929, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.494241", + "step": 2929, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025748183950781822, + "timestamp": "2025-10-01 03:24:41.496738", + "step": 2930, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.529933", + "step": 2930, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016415691003203392, + "timestamp": "2025-10-01 03:24:41.532344", + "step": 2931, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.571486", + "step": 2931, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005125550087541342, + "timestamp": "2025-10-01 03:24:41.595461", + "step": 2932, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:41.626375", + "step": 2932, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009262623265385628, + "timestamp": "2025-10-01 03:24:41.628920", + "step": 2933, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.660246", + "step": 2933, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013265718705952168, + "timestamp": "2025-10-01 03:24:41.662517", + "step": 2934, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.693044", + "step": 2934, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007311088498681784, + "timestamp": "2025-10-01 03:24:41.695252", + "step": 2935, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.725441", + "step": 2935, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023647421039640903, + "timestamp": "2025-10-01 03:24:41.749326", + "step": 2936, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.780240", + "step": 2936, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018037750851362944, + "timestamp": "2025-10-01 03:24:41.782502", + "step": 2937, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.817607", + "step": 2937, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004414800554513931, + "timestamp": "2025-10-01 03:24:41.820908", + "step": 2938, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.851689", + "step": 2938, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03668063506484032, + "timestamp": "2025-10-01 03:24:41.853969", + "step": 2939, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:41.889093", + "step": 2939, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025063326582312584, + "timestamp": "2025-10-01 03:24:41.913509", + "step": 2940, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:41.945039", + "step": 2940, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01569272391498089, + "timestamp": "2025-10-01 03:24:41.948632", + "step": 2941, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:41.979512", + "step": 2941, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008971445262432098, + "timestamp": "2025-10-01 03:24:41.981958", + "step": 2942, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.013547", + "step": 2942, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017208818346261978, + "timestamp": "2025-10-01 03:24:42.015790", + "step": 2943, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.048230", + "step": 2943, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010504002682864666, + "timestamp": "2025-10-01 03:24:42.072063", + "step": 2944, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.103724", + "step": 2944, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015442895237356424, + "timestamp": "2025-10-01 03:24:42.106144", + "step": 2945, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:42.138445", + "step": 2945, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005341976415365934, + "timestamp": "2025-10-01 03:24:42.140690", + "step": 2946, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:42.171754", + "step": 2946, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023595809936523438, + "timestamp": "2025-10-01 03:24:42.175166", + "step": 2947, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:42.216285", + "step": 2947, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017097944393754005, + "timestamp": "2025-10-01 03:24:42.240089", + "step": 2948, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.271549", + "step": 2948, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008120584301650524, + "timestamp": "2025-10-01 03:24:42.274004", + "step": 2949, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:42.305722", + "step": 2949, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04525310918688774, + "timestamp": "2025-10-01 03:24:42.307941", + "step": 2950, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.338376", + "step": 2950, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012648001313209534, + "timestamp": "2025-10-01 03:24:42.340749", + "step": 2951, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.381976", + "step": 2951, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02580830827355385, + "timestamp": "2025-10-01 03:24:42.406000", + "step": 2952, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.437429", + "step": 2952, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005193217191845179, + "timestamp": "2025-10-01 03:24:42.439638", + "step": 2953, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.469978", + "step": 2953, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02275252155959606, + "timestamp": "2025-10-01 03:24:42.472417", + "step": 2954, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.504374", + "step": 2954, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003829141380265355, + "timestamp": "2025-10-01 03:24:42.506754", + "step": 2955, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.537972", + "step": 2955, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01025456003844738, + "timestamp": "2025-10-01 03:24:42.561761", + "step": 2956, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:42.593308", + "step": 2956, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004411888774484396, + "timestamp": "2025-10-01 03:24:42.595799", + "step": 2957, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.627173", + "step": 2957, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0051564620807766914, + "timestamp": "2025-10-01 03:24:42.630201", + "step": 2958, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.661636", + "step": 2958, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022529395297169685, + "timestamp": "2025-10-01 03:24:42.663846", + "step": 2959, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.694612", + "step": 2959, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006277484353631735, + "timestamp": "2025-10-01 03:24:42.718545", + "step": 2960, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.750517", + "step": 2960, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013796146027743816, + "timestamp": "2025-10-01 03:24:42.752858", + "step": 2961, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.784416", + "step": 2961, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010050333105027676, + "timestamp": "2025-10-01 03:24:42.786606", + "step": 2962, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.819380", + "step": 2962, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025642503052949905, + "timestamp": "2025-10-01 03:24:42.821758", + "step": 2963, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.853415", + "step": 2963, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03365166857838631, + "timestamp": "2025-10-01 03:24:42.877262", + "step": 2964, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:42.907998", + "step": 2964, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0440530851483345, + "timestamp": "2025-10-01 03:24:42.910274", + "step": 2965, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:42.941609", + "step": 2965, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02812628634274006, + "timestamp": "2025-10-01 03:24:42.943888", + "step": 2966, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:42.974676", + "step": 2966, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010481386445462704, + "timestamp": "2025-10-01 03:24:42.977059", + "step": 2967, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.008783", + "step": 2967, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03232744336128235, + "timestamp": "2025-10-01 03:24:43.032509", + "step": 2968, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:43.066941", + "step": 2968, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001979721477255225, + "timestamp": "2025-10-01 03:24:43.069456", + "step": 2969, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.102843", + "step": 2969, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003708248259499669, + "timestamp": "2025-10-01 03:24:43.105113", + "step": 2970, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.135529", + "step": 2970, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007259167265146971, + "timestamp": "2025-10-01 03:24:43.138853", + "step": 2971, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.174231", + "step": 2971, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009146636002697051, + "timestamp": "2025-10-01 03:24:43.198069", + "step": 2972, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.232169", + "step": 2972, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006259513320401311, + "timestamp": "2025-10-01 03:24:43.234521", + "step": 2973, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.274596", + "step": 2973, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011713932268321514, + "timestamp": "2025-10-01 03:24:43.276789", + "step": 2974, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.310285", + "step": 2974, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012795046204701066, + "timestamp": "2025-10-01 03:24:43.312943", + "step": 2975, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.346787", + "step": 2975, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025379187427461147, + "timestamp": "2025-10-01 03:24:43.370423", + "step": 2976, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.403657", + "step": 2976, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00626587588340044, + "timestamp": "2025-10-01 03:24:43.405913", + "step": 2977, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:43.438213", + "step": 2977, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05259128659963608, + "timestamp": "2025-10-01 03:24:43.441014", + "step": 2978, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.480942", + "step": 2978, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004325970076024532, + "timestamp": "2025-10-01 03:24:43.483288", + "step": 2979, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.516153", + "step": 2979, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007229256443679333, + "timestamp": "2025-10-01 03:24:43.539798", + "step": 2980, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.573093", + "step": 2980, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004733658861368895, + "timestamp": "2025-10-01 03:24:43.575390", + "step": 2981, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.607564", + "step": 2981, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005716077517718077, + "timestamp": "2025-10-01 03:24:43.609964", + "step": 2982, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.640494", + "step": 2982, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006280566565692425, + "timestamp": "2025-10-01 03:24:43.643581", + "step": 2983, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.687864", + "step": 2983, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006606461247429252, + "timestamp": "2025-10-01 03:24:43.711705", + "step": 2984, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:43.748587", + "step": 2984, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002426252467557788, + "timestamp": "2025-10-01 03:24:43.750475", + "step": 2985, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.783095", + "step": 2985, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018101593479514122, + "timestamp": "2025-10-01 03:24:43.785151", + "step": 2986, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.817020", + "step": 2986, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023617076221853495, + "timestamp": "2025-10-01 03:24:43.819073", + "step": 2987, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.851289", + "step": 2987, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009448095224797726, + "timestamp": "2025-10-01 03:24:43.874900", + "step": 2988, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.907453", + "step": 2988, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01988748274743557, + "timestamp": "2025-10-01 03:24:43.909434", + "step": 2989, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.941805", + "step": 2989, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007581166457384825, + "timestamp": "2025-10-01 03:24:43.944124", + "step": 2990, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:43.976513", + "step": 2990, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017863454297184944, + "timestamp": "2025-10-01 03:24:43.978647", + "step": 2991, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:44.010049", + "step": 2991, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021945340558886528, + "timestamp": "2025-10-01 03:24:44.033746", + "step": 2992, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:44.071012", + "step": 2992, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006313269841484725, + "timestamp": "2025-10-01 03:24:44.073496", + "step": 2993, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:44.113719", + "step": 2993, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020924236625432968, + "timestamp": "2025-10-01 03:24:44.115978", + "step": 2994, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:44.152148", + "step": 2994, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026175195816904306, + "timestamp": "2025-10-01 03:24:44.154638", + "step": 2995, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:44.205400", + "step": 2995, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002110003726556897, + "timestamp": "2025-10-01 03:24:44.229257", + "step": 2996, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:44.263425", + "step": 2996, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013667812570929527, + "timestamp": "2025-10-01 03:24:44.265528", + "step": 2997, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:44.296584", + "step": 2997, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03987133130431175, + "timestamp": "2025-10-01 03:24:44.298984", + "step": 2998, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:44.349751", + "step": 2998, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.042340654879808426, + "timestamp": "2025-10-01 03:24:44.353419", + "step": 2999, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:44.384832", + "step": 2999, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0264864694327116, + "timestamp": "2025-10-01 03:24:44.408688", + "step": 3000, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 3000", + "timestamp": "2025-10-01 03:24:49.300925", + "step": 3000, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:49.341677", + "step": 3000, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003313994500786066, + "timestamp": "2025-10-01 03:24:49.353748", + "step": 3001, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:49.397024", + "step": 3001, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025094235315918922, + "timestamp": "2025-10-01 03:24:49.401735", + "step": 3002, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:49.437405", + "step": 3002, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025419533252716064, + "timestamp": "2025-10-01 03:24:49.451173", + "step": 3003, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:49.484280", + "step": 3003, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011166109470650554, + "timestamp": "2025-10-01 03:24:49.522209", + "step": 3004, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:49.562362", + "step": 3004, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010485586244612932, + "timestamp": "2025-10-01 03:24:49.576025", + "step": 3005, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:49.619575", + "step": 3005, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.048523079603910446, + "timestamp": "2025-10-01 03:24:49.627936", + "step": 3006, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:49.666680", + "step": 3006, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015211907215416431, + "timestamp": "2025-10-01 03:24:49.675637", + "step": 3007, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:49.726495", + "step": 3007, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028032076079398394, + "timestamp": "2025-10-01 03:24:49.761501", + "step": 3008, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:49.793472", + "step": 3008, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04077979177236557, + "timestamp": "2025-10-01 03:24:49.797980", + "step": 3009, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:49.830824", + "step": 3009, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006372230127453804, + "timestamp": "2025-10-01 03:24:49.838990", + "step": 3010, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:49.879224", + "step": 3010, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006447223015129566, + "timestamp": "2025-10-01 03:24:49.886794", + "step": 3011, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:49.930462", + "step": 3011, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003405944677069783, + "timestamp": "2025-10-01 03:24:49.960328", + "step": 3012, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.003905", + "step": 3012, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012290330603718758, + "timestamp": "2025-10-01 03:24:50.012723", + "step": 3013, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.058529", + "step": 3013, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023689887020736933, + "timestamp": "2025-10-01 03:24:50.064806", + "step": 3014, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.108061", + "step": 3014, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005252111586742103, + "timestamp": "2025-10-01 03:24:50.119535", + "step": 3015, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.157709", + "step": 3015, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014746964909136295, + "timestamp": "2025-10-01 03:24:50.183319", + "step": 3016, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:50.222982", + "step": 3016, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00041070018778555095, + "timestamp": "2025-10-01 03:24:50.230549", + "step": 3017, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.284374", + "step": 3017, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028892431408166885, + "timestamp": "2025-10-01 03:24:50.293464", + "step": 3018, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.338438", + "step": 3018, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00301827909424901, + "timestamp": "2025-10-01 03:24:50.342640", + "step": 3019, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.387129", + "step": 3019, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00452068354934454, + "timestamp": "2025-10-01 03:24:50.418299", + "step": 3020, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:50.467579", + "step": 3020, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018950782250612974, + "timestamp": "2025-10-01 03:24:50.472250", + "step": 3021, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.505676", + "step": 3021, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013081954093649983, + "timestamp": "2025-10-01 03:24:50.518591", + "step": 3022, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.566761", + "step": 3022, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01916981302201748, + "timestamp": "2025-10-01 03:24:50.577733", + "step": 3023, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.619226", + "step": 3023, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0173745509237051, + "timestamp": "2025-10-01 03:24:50.645126", + "step": 3024, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:24:50.684798", + "step": 3024, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005159749649465084, + "timestamp": "2025-10-01 03:24:50.693861", + "step": 3025, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.733681", + "step": 3025, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002217615256085992, + "timestamp": "2025-10-01 03:24:50.739907", + "step": 3026, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.779100", + "step": 3026, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002934920135885477, + "timestamp": "2025-10-01 03:24:50.787702", + "step": 3027, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.827243", + "step": 3027, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02781626395881176, + "timestamp": "2025-10-01 03:24:50.857567", + "step": 3028, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.900826", + "step": 3028, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03313587233424187, + "timestamp": "2025-10-01 03:24:50.912145", + "step": 3029, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:50.990243", + "step": 3029, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001101549481973052, + "timestamp": "2025-10-01 03:24:50.999151", + "step": 3030, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:51.038786", + "step": 3030, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006045759655535221, + "timestamp": "2025-10-01 03:24:51.048137", + "step": 3031, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:51.087590", + "step": 3031, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000637427787296474, + "timestamp": "2025-10-01 03:24:51.112771", + "step": 3032, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:51.177609", + "step": 3032, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025964366272091866, + "timestamp": "2025-10-01 03:24:51.187696", + "step": 3033, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:51.228396", + "step": 3033, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021574883721768856, + "timestamp": "2025-10-01 03:24:51.232110", + "step": 3034, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:51.276592", + "step": 3034, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00027834679349325597, + "timestamp": "2025-10-01 03:24:51.285551", + "step": 3035, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:51.324254", + "step": 3035, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.054799217730760574, + "timestamp": "2025-10-01 03:24:51.354569", + "step": 3036, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:51.394105", + "step": 3036, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.051234982907772064, + "timestamp": "2025-10-01 03:24:51.399002", + "step": 3037, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:51.439730", + "step": 3037, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013587677385658026, + "timestamp": "2025-10-01 03:24:51.442570", + "step": 3038, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:51.477636", + "step": 3038, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00037517346208915114, + "timestamp": "2025-10-01 03:24:51.483777", + "step": 3039, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:51.521346", + "step": 3039, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008545552380383015, + "timestamp": "2025-10-01 03:24:51.548052", + "step": 3040, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:24:54.807440", + "step": 3040, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2677563.476485418, + "timestamp": "2025-10-01 03:24:54.813461", + "step": 3040, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:54.847963", + "step": 3040, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01733328588306904, + "timestamp": "2025-10-01 03:24:54.860342", + "step": 3041, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:54.900382", + "step": 3041, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004478515591472387, + "timestamp": "2025-10-01 03:24:54.909847", + "step": 3042, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:54.956362", + "step": 3042, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006012989906594157, + "timestamp": "2025-10-01 03:24:54.963503", + "step": 3043, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:55.001844", + "step": 3043, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006853709346614778, + "timestamp": "2025-10-01 03:24:55.031847", + "step": 3044, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:55.073516", + "step": 3044, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.057607464492321014, + "timestamp": "2025-10-01 03:24:55.076373", + "step": 3045, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:55.127981", + "step": 3045, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006905236281454563, + "timestamp": "2025-10-01 03:24:55.134256", + "step": 3046, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:55.183437", + "step": 3046, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005544011015444994, + "timestamp": "2025-10-01 03:24:55.186007", + "step": 3047, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:55.220031", + "step": 3047, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008422502432949841, + "timestamp": "2025-10-01 03:24:55.243952", + "step": 3048, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:55.278598", + "step": 3048, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020064128562808037, + "timestamp": "2025-10-01 03:24:55.283492", + "step": 3049, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:55.326457", + "step": 3049, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007223148364573717, + "timestamp": "2025-10-01 03:24:55.341550", + "step": 3050, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:55.397464", + "step": 3050, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004763361532241106, + "timestamp": "2025-10-01 03:24:55.414416", + "step": 3051, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:55.463610", + "step": 3051, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005233142292127013, + "timestamp": "2025-10-01 03:24:55.491343", + "step": 3052, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:55.541885", + "step": 3052, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.048399485647678375, + "timestamp": "2025-10-01 03:24:55.560620", + "step": 3053, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:55.608939", + "step": 3053, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002331917406991124, + "timestamp": "2025-10-01 03:24:55.625814", + "step": 3054, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:55.680993", + "step": 3054, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004276335705071688, + "timestamp": "2025-10-01 03:24:55.699689", + "step": 3055, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:55.747784", + "step": 3055, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002513325016479939, + "timestamp": "2025-10-01 03:24:55.787696", + "step": 3056, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:55.834264", + "step": 3056, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001599853509105742, + "timestamp": "2025-10-01 03:24:55.837751", + "step": 3057, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:55.875800", + "step": 3057, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011215746402740479, + "timestamp": "2025-10-01 03:24:55.880747", + "step": 3058, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:55.941948", + "step": 3058, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006993879098445177, + "timestamp": "2025-10-01 03:24:55.949447", + "step": 3059, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:55.990061", + "step": 3059, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04430673271417618, + "timestamp": "2025-10-01 03:24:56.020846", + "step": 3060, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.069967", + "step": 3060, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009950843639671803, + "timestamp": "2025-10-01 03:24:56.072790", + "step": 3061, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.125617", + "step": 3061, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013285098597407341, + "timestamp": "2025-10-01 03:24:56.134051", + "step": 3062, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.181740", + "step": 3062, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006240843329578638, + "timestamp": "2025-10-01 03:24:56.188157", + "step": 3063, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.240757", + "step": 3063, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013169116573408246, + "timestamp": "2025-10-01 03:24:56.268952", + "step": 3064, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.311171", + "step": 3064, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019016595324501395, + "timestamp": "2025-10-01 03:24:56.316822", + "step": 3065, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.364445", + "step": 3065, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006573767401278019, + "timestamp": "2025-10-01 03:24:56.368766", + "step": 3066, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.418303", + "step": 3066, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008892195299267769, + "timestamp": "2025-10-01 03:24:56.423098", + "step": 3067, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.459894", + "step": 3067, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.040535975247621536, + "timestamp": "2025-10-01 03:24:56.486348", + "step": 3068, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.524002", + "step": 3068, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00036119879223406315, + "timestamp": "2025-10-01 03:24:56.531349", + "step": 3069, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.591968", + "step": 3069, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004063001833856106, + "timestamp": "2025-10-01 03:24:56.600352", + "step": 3070, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.636896", + "step": 3070, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028936192393302917, + "timestamp": "2025-10-01 03:24:56.643149", + "step": 3071, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.684546", + "step": 3071, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006156697869300842, + "timestamp": "2025-10-01 03:24:56.713118", + "step": 3072, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.752150", + "step": 3072, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00045119828428141773, + "timestamp": "2025-10-01 03:24:56.759803", + "step": 3073, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.805260", + "step": 3073, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06375899165868759, + "timestamp": "2025-10-01 03:24:56.815569", + "step": 3074, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:56.856323", + "step": 3074, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021600532345473766, + "timestamp": "2025-10-01 03:24:56.864586", + "step": 3075, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.901534", + "step": 3075, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013738697161898017, + "timestamp": "2025-10-01 03:24:56.935242", + "step": 3076, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:56.987301", + "step": 3076, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014947867020964622, + "timestamp": "2025-10-01 03:24:56.996495", + "step": 3077, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:57.075386", + "step": 3077, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002822326961904764, + "timestamp": "2025-10-01 03:24:57.086532", + "step": 3078, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:57.134008", + "step": 3078, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021224652882665396, + "timestamp": "2025-10-01 03:24:57.143813", + "step": 3079, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:57.180904", + "step": 3079, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010665594600141048, + "timestamp": "2025-10-01 03:24:57.212852", + "step": 3080, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:57.283304", + "step": 3080, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.043773915618658066, + "timestamp": "2025-10-01 03:24:57.287388", + "step": 3081, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:57.327930", + "step": 3081, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028561921790242195, + "timestamp": "2025-10-01 03:24:57.338115", + "step": 3082, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:57.392186", + "step": 3082, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03091389313340187, + "timestamp": "2025-10-01 03:24:57.401331", + "step": 3083, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:57.451330", + "step": 3083, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0194419976323843, + "timestamp": "2025-10-01 03:24:57.482802", + "step": 3084, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:57.522880", + "step": 3084, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021966781932860613, + "timestamp": "2025-10-01 03:24:57.534011", + "step": 3085, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:57.586612", + "step": 3085, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031440798193216324, + "timestamp": "2025-10-01 03:24:57.593284", + "step": 3086, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:57.644934", + "step": 3086, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027496477123349905, + "timestamp": "2025-10-01 03:24:57.652765", + "step": 3087, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:57.690066", + "step": 3087, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010415061842650175, + "timestamp": "2025-10-01 03:24:57.714523", + "step": 3088, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:57.770935", + "step": 3088, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0069958907552063465, + "timestamp": "2025-10-01 03:24:57.777853", + "step": 3089, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:57.827511", + "step": 3089, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04576292261481285, + "timestamp": "2025-10-01 03:24:57.835653", + "step": 3090, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:57.897754", + "step": 3090, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034737526439130306, + "timestamp": "2025-10-01 03:24:57.904966", + "step": 3091, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:57.953282", + "step": 3091, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015425324440002441, + "timestamp": "2025-10-01 03:24:57.982316", + "step": 3092, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.022193", + "step": 3092, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008311758749186993, + "timestamp": "2025-10-01 03:24:58.030376", + "step": 3093, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:58.070001", + "step": 3093, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010228081606328487, + "timestamp": "2025-10-01 03:24:58.078692", + "step": 3094, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.123682", + "step": 3094, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00660517206415534, + "timestamp": "2025-10-01 03:24:58.133605", + "step": 3095, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.172431", + "step": 3095, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009818763472139835, + "timestamp": "2025-10-01 03:24:58.202872", + "step": 3096, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.242579", + "step": 3096, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011790439486503601, + "timestamp": "2025-10-01 03:24:58.251785", + "step": 3097, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.300208", + "step": 3097, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028569472953677177, + "timestamp": "2025-10-01 03:24:58.310447", + "step": 3098, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.360791", + "step": 3098, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006462990422733128, + "timestamp": "2025-10-01 03:24:58.370543", + "step": 3099, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.427313", + "step": 3099, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00229777954518795, + "timestamp": "2025-10-01 03:24:58.459279", + "step": 3100, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.500869", + "step": 3100, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008419430814683437, + "timestamp": "2025-10-01 03:24:58.509422", + "step": 3101, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:58.559736", + "step": 3101, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.043936166912317276, + "timestamp": "2025-10-01 03:24:58.563623", + "step": 3102, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.606038", + "step": 3102, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05666399374604225, + "timestamp": "2025-10-01 03:24:58.616974", + "step": 3103, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:24:58.687289", + "step": 3103, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00434046471491456, + "timestamp": "2025-10-01 03:24:58.717237", + "step": 3104, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.766170", + "step": 3104, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02597210183739662, + "timestamp": "2025-10-01 03:24:58.778007", + "step": 3105, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.819321", + "step": 3105, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00027414047508500516, + "timestamp": "2025-10-01 03:24:58.826795", + "step": 3106, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.875337", + "step": 3106, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014137340476736426, + "timestamp": "2025-10-01 03:24:58.883264", + "step": 3107, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.930230", + "step": 3107, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0060173459351062775, + "timestamp": "2025-10-01 03:24:58.959816", + "step": 3108, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:58.999424", + "step": 3108, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013781826011836529, + "timestamp": "2025-10-01 03:24:59.007573", + "step": 3109, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.058965", + "step": 3109, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03725334629416466, + "timestamp": "2025-10-01 03:24:59.067570", + "step": 3110, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.107524", + "step": 3110, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015755448257550597, + "timestamp": "2025-10-01 03:24:59.118140", + "step": 3111, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:24:59.163396", + "step": 3111, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006482364144176245, + "timestamp": "2025-10-01 03:24:59.193472", + "step": 3112, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.233022", + "step": 3112, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02490421198308468, + "timestamp": "2025-10-01 03:24:59.240998", + "step": 3113, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.279743", + "step": 3113, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00433316407725215, + "timestamp": "2025-10-01 03:24:59.286920", + "step": 3114, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.325042", + "step": 3114, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01547976303845644, + "timestamp": "2025-10-01 03:24:59.335148", + "step": 3115, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.387122", + "step": 3115, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005793449003249407, + "timestamp": "2025-10-01 03:24:59.415649", + "step": 3116, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.455805", + "step": 3116, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023638142738491297, + "timestamp": "2025-10-01 03:24:59.464273", + "step": 3117, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.517268", + "step": 3117, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011786287650465965, + "timestamp": "2025-10-01 03:24:59.527869", + "step": 3118, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.581399", + "step": 3118, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008116266108117998, + "timestamp": "2025-10-01 03:24:59.590189", + "step": 3119, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.629597", + "step": 3119, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030240449123084545, + "timestamp": "2025-10-01 03:24:59.660839", + "step": 3120, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.727786", + "step": 3120, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010476933093741536, + "timestamp": "2025-10-01 03:24:59.736966", + "step": 3121, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.794601", + "step": 3121, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023849381133913994, + "timestamp": "2025-10-01 03:24:59.803831", + "step": 3122, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.856524", + "step": 3122, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006496456917375326, + "timestamp": "2025-10-01 03:24:59.861662", + "step": 3123, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:24:59.931637", + "step": 3123, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00423115398734808, + "timestamp": "2025-10-01 03:24:59.961180", + "step": 3124, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:00.007613", + "step": 3124, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026077006477862597, + "timestamp": "2025-10-01 03:25:00.015109", + "step": 3125, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:00.086484", + "step": 3125, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017952093854546547, + "timestamp": "2025-10-01 03:25:00.091769", + "step": 3126, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:00.142255", + "step": 3126, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04112892225384712, + "timestamp": "2025-10-01 03:25:00.151434", + "step": 3127, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:00.213183", + "step": 3127, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.041600294411182404, + "timestamp": "2025-10-01 03:25:00.243707", + "step": 3128, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:00.289238", + "step": 3128, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01369091309607029, + "timestamp": "2025-10-01 03:25:00.296139", + "step": 3129, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:00.383164", + "step": 3129, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012993245385587215, + "timestamp": "2025-10-01 03:25:00.392145", + "step": 3130, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:00.454587", + "step": 3130, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013450332917273045, + "timestamp": "2025-10-01 03:25:00.457887", + "step": 3131, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:00.511009", + "step": 3131, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007406151853501797, + "timestamp": "2025-10-01 03:25:00.540741", + "step": 3132, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:00.598610", + "step": 3132, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028756449464708567, + "timestamp": "2025-10-01 03:25:00.608751", + "step": 3133, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:00.654232", + "step": 3133, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010317564010620117, + "timestamp": "2025-10-01 03:25:00.664234", + "step": 3134, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:00.750954", + "step": 3134, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02919425070285797, + "timestamp": "2025-10-01 03:25:00.761937", + "step": 3135, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:00.829504", + "step": 3135, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002552751451730728, + "timestamp": "2025-10-01 03:25:00.856909", + "step": 3136, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:00.922376", + "step": 3136, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007680612150579691, + "timestamp": "2025-10-01 03:25:00.935424", + "step": 3137, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:00.975554", + "step": 3137, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02397662214934826, + "timestamp": "2025-10-01 03:25:00.979990", + "step": 3138, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:01.029295", + "step": 3138, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001146638416685164, + "timestamp": "2025-10-01 03:25:01.041008", + "step": 3139, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:01.080545", + "step": 3139, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010633069090545177, + "timestamp": "2025-10-01 03:25:01.112741", + "step": 3140, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:01.150218", + "step": 3140, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013545032124966383, + "timestamp": "2025-10-01 03:25:01.159688", + "step": 3141, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:01.197598", + "step": 3141, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034734655637294054, + "timestamp": "2025-10-01 03:25:01.207955", + "step": 3142, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:01.260893", + "step": 3142, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007943113334476948, + "timestamp": "2025-10-01 03:25:01.272575", + "step": 3143, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:01.314351", + "step": 3143, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015658032149076462, + "timestamp": "2025-10-01 03:25:01.346101", + "step": 3144, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:01.388589", + "step": 3144, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004173672758042812, + "timestamp": "2025-10-01 03:25:01.398823", + "step": 3145, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:01.436914", + "step": 3145, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004275917075574398, + "timestamp": "2025-10-01 03:25:01.447080", + "step": 3146, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:01.506210", + "step": 3146, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009221597574651241, + "timestamp": "2025-10-01 03:25:01.509380", + "step": 3147, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:01.563061", + "step": 3147, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004014058969914913, + "timestamp": "2025-10-01 03:25:01.594909", + "step": 3148, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:01.632653", + "step": 3148, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01492088008671999, + "timestamp": "2025-10-01 03:25:01.645025", + "step": 3149, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:01.688202", + "step": 3149, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020151542499661446, + "timestamp": "2025-10-01 03:25:01.692344", + "step": 3150, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:01.735691", + "step": 3150, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027011376805603504, + "timestamp": "2025-10-01 03:25:01.744497", + "step": 3151, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:01.783683", + "step": 3151, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019118266645818949, + "timestamp": "2025-10-01 03:25:01.813522", + "step": 3152, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:01.851523", + "step": 3152, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03144407644867897, + "timestamp": "2025-10-01 03:25:01.860759", + "step": 3153, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:01.914597", + "step": 3153, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030637127347290516, + "timestamp": "2025-10-01 03:25:01.925169", + "step": 3154, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:01.966614", + "step": 3154, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006981182959862053, + "timestamp": "2025-10-01 03:25:01.976133", + "step": 3155, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:02.019214", + "step": 3155, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008347787894308567, + "timestamp": "2025-10-01 03:25:02.051623", + "step": 3156, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:02.090653", + "step": 3156, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008302517235279083, + "timestamp": "2025-10-01 03:25:02.097178", + "step": 3157, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.136901", + "step": 3157, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004930675029754639, + "timestamp": "2025-10-01 03:25:02.145682", + "step": 3158, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:02.183010", + "step": 3158, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015059016644954681, + "timestamp": "2025-10-01 03:25:02.190047", + "step": 3159, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.228226", + "step": 3159, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004658237565308809, + "timestamp": "2025-10-01 03:25:02.260319", + "step": 3160, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.302256", + "step": 3160, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000425753794843331, + "timestamp": "2025-10-01 03:25:02.312142", + "step": 3161, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.351698", + "step": 3161, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020568682812154293, + "timestamp": "2025-10-01 03:25:02.366658", + "step": 3162, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.417094", + "step": 3162, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003896774258464575, + "timestamp": "2025-10-01 03:25:02.425073", + "step": 3163, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.465806", + "step": 3163, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003519330406561494, + "timestamp": "2025-10-01 03:25:02.498962", + "step": 3164, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.553998", + "step": 3164, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.040104009211063385, + "timestamp": "2025-10-01 03:25:02.560391", + "step": 3165, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.607173", + "step": 3165, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00552584882825613, + "timestamp": "2025-10-01 03:25:02.614391", + "step": 3166, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.655280", + "step": 3166, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003695692284964025, + "timestamp": "2025-10-01 03:25:02.662330", + "step": 3167, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.701703", + "step": 3167, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025732485577464104, + "timestamp": "2025-10-01 03:25:02.732589", + "step": 3168, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.775476", + "step": 3168, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04758130759000778, + "timestamp": "2025-10-01 03:25:02.784896", + "step": 3169, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.830808", + "step": 3169, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014595004031434655, + "timestamp": "2025-10-01 03:25:02.839246", + "step": 3170, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.895787", + "step": 3170, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002775374101474881, + "timestamp": "2025-10-01 03:25:02.903344", + "step": 3171, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:02.942005", + "step": 3171, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014680930180475116, + "timestamp": "2025-10-01 03:25:02.972439", + "step": 3172, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.012872", + "step": 3172, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015091783367097378, + "timestamp": "2025-10-01 03:25:03.021054", + "step": 3173, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:03.058292", + "step": 3173, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006103575229644775, + "timestamp": "2025-10-01 03:25:03.070386", + "step": 3174, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.120780", + "step": 3174, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011374856112524867, + "timestamp": "2025-10-01 03:25:03.128112", + "step": 3175, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.170791", + "step": 3175, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006010821089148521, + "timestamp": "2025-10-01 03:25:03.196012", + "step": 3176, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:03.233570", + "step": 3176, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011463311966508627, + "timestamp": "2025-10-01 03:25:03.242811", + "step": 3177, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.284533", + "step": 3177, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00812136847525835, + "timestamp": "2025-10-01 03:25:03.289168", + "step": 3178, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.334157", + "step": 3178, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006228397833183408, + "timestamp": "2025-10-01 03:25:03.343555", + "step": 3179, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.386209", + "step": 3179, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04722919687628746, + "timestamp": "2025-10-01 03:25:03.418425", + "step": 3180, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.457510", + "step": 3180, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0592794306576252, + "timestamp": "2025-10-01 03:25:03.467014", + "step": 3181, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.521665", + "step": 3181, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003795678203459829, + "timestamp": "2025-10-01 03:25:03.532674", + "step": 3182, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.590295", + "step": 3182, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026979739777743816, + "timestamp": "2025-10-01 03:25:03.600416", + "step": 3183, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.640684", + "step": 3183, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06292461603879929, + "timestamp": "2025-10-01 03:25:03.670059", + "step": 3184, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.723185", + "step": 3184, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034575399477034807, + "timestamp": "2025-10-01 03:25:03.733743", + "step": 3185, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:03.792675", + "step": 3185, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008651304058730602, + "timestamp": "2025-10-01 03:25:03.802477", + "step": 3186, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.853096", + "step": 3186, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.054369065910577774, + "timestamp": "2025-10-01 03:25:03.862512", + "step": 3187, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.908221", + "step": 3187, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012312197359278798, + "timestamp": "2025-10-01 03:25:03.940227", + "step": 3188, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:03.987308", + "step": 3188, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00034907247754745185, + "timestamp": "2025-10-01 03:25:03.998147", + "step": 3189, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:04.037666", + "step": 3189, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006248199846595526, + "timestamp": "2025-10-01 03:25:04.046499", + "step": 3190, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:04.080934", + "step": 3190, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020546048879623413, + "timestamp": "2025-10-01 03:25:04.090255", + "step": 3191, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:04.129868", + "step": 3191, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0065701850689947605, + "timestamp": "2025-10-01 03:25:04.160628", + "step": 3192, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:25:07.058551", + "step": 3192, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2251497.691130241, + "timestamp": "2025-10-01 03:25:07.065637", + "step": 3192, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.100668", + "step": 3192, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010494111338630319, + "timestamp": "2025-10-01 03:25:07.108082", + "step": 3193, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.143869", + "step": 3193, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04635751619935036, + "timestamp": "2025-10-01 03:25:07.150890", + "step": 3194, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.185739", + "step": 3194, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003807122993748635, + "timestamp": "2025-10-01 03:25:07.192247", + "step": 3195, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.240037", + "step": 3195, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027106210589408875, + "timestamp": "2025-10-01 03:25:07.268055", + "step": 3196, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.308087", + "step": 3196, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008563270675949752, + "timestamp": "2025-10-01 03:25:07.316485", + "step": 3197, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.360417", + "step": 3197, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002351598348468542, + "timestamp": "2025-10-01 03:25:07.369324", + "step": 3198, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:07.408747", + "step": 3198, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01610124483704567, + "timestamp": "2025-10-01 03:25:07.414387", + "step": 3199, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.463290", + "step": 3199, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013642637059092522, + "timestamp": "2025-10-01 03:25:07.493046", + "step": 3200, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.531807", + "step": 3200, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005350664258003235, + "timestamp": "2025-10-01 03:25:07.540152", + "step": 3201, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.587826", + "step": 3201, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031008445657789707, + "timestamp": "2025-10-01 03:25:07.595944", + "step": 3202, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.643480", + "step": 3202, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006861585192382336, + "timestamp": "2025-10-01 03:25:07.653453", + "step": 3203, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.695033", + "step": 3203, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019323011860251427, + "timestamp": "2025-10-01 03:25:07.725751", + "step": 3204, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.766147", + "step": 3204, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005505076609551907, + "timestamp": "2025-10-01 03:25:07.777085", + "step": 3205, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.815886", + "step": 3205, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006792313884943724, + "timestamp": "2025-10-01 03:25:07.824177", + "step": 3206, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.864255", + "step": 3206, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00103358319029212, + "timestamp": "2025-10-01 03:25:07.871708", + "step": 3207, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.910178", + "step": 3207, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007812761701643467, + "timestamp": "2025-10-01 03:25:07.938928", + "step": 3208, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:07.985165", + "step": 3208, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004798748530447483, + "timestamp": "2025-10-01 03:25:07.992859", + "step": 3209, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.031231", + "step": 3209, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015006862580776215, + "timestamp": "2025-10-01 03:25:08.038857", + "step": 3210, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.081567", + "step": 3210, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00027166164363734424, + "timestamp": "2025-10-01 03:25:08.089475", + "step": 3211, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.139728", + "step": 3211, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003981855232268572, + "timestamp": "2025-10-01 03:25:08.169626", + "step": 3212, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.206213", + "step": 3212, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037291590124368668, + "timestamp": "2025-10-01 03:25:08.214142", + "step": 3213, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:08.254301", + "step": 3213, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010149207897484303, + "timestamp": "2025-10-01 03:25:08.264415", + "step": 3214, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.303892", + "step": 3214, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05998668074607849, + "timestamp": "2025-10-01 03:25:08.307911", + "step": 3215, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.350120", + "step": 3215, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.049220409244298935, + "timestamp": "2025-10-01 03:25:08.379434", + "step": 3216, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.415895", + "step": 3216, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020737087354063988, + "timestamp": "2025-10-01 03:25:08.419296", + "step": 3217, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:08.458266", + "step": 3217, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023045334964990616, + "timestamp": "2025-10-01 03:25:08.466526", + "step": 3218, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.502612", + "step": 3218, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006698797922581434, + "timestamp": "2025-10-01 03:25:08.510457", + "step": 3219, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.556801", + "step": 3219, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013558960054069757, + "timestamp": "2025-10-01 03:25:08.589700", + "step": 3220, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.631280", + "step": 3220, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001688324729911983, + "timestamp": "2025-10-01 03:25:08.640634", + "step": 3221, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.683205", + "step": 3221, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0072437855415046215, + "timestamp": "2025-10-01 03:25:08.694154", + "step": 3222, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.733724", + "step": 3222, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008391639217734337, + "timestamp": "2025-10-01 03:25:08.742919", + "step": 3223, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.780572", + "step": 3223, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03736274316906929, + "timestamp": "2025-10-01 03:25:08.811889", + "step": 3224, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.853909", + "step": 3224, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029487257823348045, + "timestamp": "2025-10-01 03:25:08.860017", + "step": 3225, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.902838", + "step": 3225, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001475482014939189, + "timestamp": "2025-10-01 03:25:08.909748", + "step": 3226, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:08.951564", + "step": 3226, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03331061452627182, + "timestamp": "2025-10-01 03:25:08.959907", + "step": 3227, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:08.997444", + "step": 3227, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028554698452353477, + "timestamp": "2025-10-01 03:25:09.025364", + "step": 3228, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:09.077857", + "step": 3228, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005760558880865574, + "timestamp": "2025-10-01 03:25:09.085750", + "step": 3229, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:09.131870", + "step": 3229, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02775239571928978, + "timestamp": "2025-10-01 03:25:09.139499", + "step": 3230, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:09.177230", + "step": 3230, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004155817907303572, + "timestamp": "2025-10-01 03:25:09.186272", + "step": 3231, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:09.231058", + "step": 3231, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021670705173164606, + "timestamp": "2025-10-01 03:25:09.263441", + "step": 3232, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:09.310053", + "step": 3232, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008345721289515495, + "timestamp": "2025-10-01 03:25:09.320979", + "step": 3233, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:09.367349", + "step": 3233, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02698822133243084, + "timestamp": "2025-10-01 03:25:09.373395", + "step": 3234, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:09.417201", + "step": 3234, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004532273858785629, + "timestamp": "2025-10-01 03:25:09.424198", + "step": 3235, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:09.462536", + "step": 3235, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019751007203012705, + "timestamp": "2025-10-01 03:25:09.487797", + "step": 3236, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:09.531167", + "step": 3236, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001980219269171357, + "timestamp": "2025-10-01 03:25:09.538197", + "step": 3237, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:09.578994", + "step": 3237, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009545587236061692, + "timestamp": "2025-10-01 03:25:09.587592", + "step": 3238, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:09.631010", + "step": 3238, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006521126721054316, + "timestamp": "2025-10-01 03:25:09.640169", + "step": 3239, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:09.678494", + "step": 3239, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.060817357152700424, + "timestamp": "2025-10-01 03:25:09.708882", + "step": 3240, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:09.751476", + "step": 3240, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016424155328422785, + "timestamp": "2025-10-01 03:25:09.754019", + "step": 3241, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:09.794323", + "step": 3241, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021546657662838697, + "timestamp": "2025-10-01 03:25:09.799677", + "step": 3242, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:09.837771", + "step": 3242, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01361051294952631, + "timestamp": "2025-10-01 03:25:09.842175", + "step": 3243, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:09.877266", + "step": 3243, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0399436354637146, + "timestamp": "2025-10-01 03:25:09.903966", + "step": 3244, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:09.943833", + "step": 3244, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028417544439435005, + "timestamp": "2025-10-01 03:25:09.953229", + "step": 3245, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.016308", + "step": 3245, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016209067543968558, + "timestamp": "2025-10-01 03:25:10.021614", + "step": 3246, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.055144", + "step": 3246, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000943609222304076, + "timestamp": "2025-10-01 03:25:10.058499", + "step": 3247, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.091626", + "step": 3247, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022393863648176193, + "timestamp": "2025-10-01 03:25:10.118768", + "step": 3248, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.156785", + "step": 3248, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006012738682329655, + "timestamp": "2025-10-01 03:25:10.160920", + "step": 3249, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.197990", + "step": 3249, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033384752459824085, + "timestamp": "2025-10-01 03:25:10.205437", + "step": 3250, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.251440", + "step": 3250, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003660967806354165, + "timestamp": "2025-10-01 03:25:10.257404", + "step": 3251, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:10.294868", + "step": 3251, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002742575481534004, + "timestamp": "2025-10-01 03:25:10.319200", + "step": 3252, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.355926", + "step": 3252, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03091638907790184, + "timestamp": "2025-10-01 03:25:10.358293", + "step": 3253, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.392008", + "step": 3253, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00466503668576479, + "timestamp": "2025-10-01 03:25:10.397580", + "step": 3254, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.447255", + "step": 3254, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016337569104507565, + "timestamp": "2025-10-01 03:25:10.453726", + "step": 3255, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.499072", + "step": 3255, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003814792027696967, + "timestamp": "2025-10-01 03:25:10.524448", + "step": 3256, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.557610", + "step": 3256, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031677167862653732, + "timestamp": "2025-10-01 03:25:10.565535", + "step": 3257, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:10.601332", + "step": 3257, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007219274993985891, + "timestamp": "2025-10-01 03:25:10.605830", + "step": 3258, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.646864", + "step": 3258, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005386847187764943, + "timestamp": "2025-10-01 03:25:10.649890", + "step": 3259, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.686664", + "step": 3259, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005017862655222416, + "timestamp": "2025-10-01 03:25:10.714309", + "step": 3260, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.747123", + "step": 3260, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022104065865278244, + "timestamp": "2025-10-01 03:25:10.749616", + "step": 3261, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:10.784015", + "step": 3261, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00936212670058012, + "timestamp": "2025-10-01 03:25:10.786569", + "step": 3262, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.819775", + "step": 3262, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013445605523884296, + "timestamp": "2025-10-01 03:25:10.821998", + "step": 3263, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:10.854437", + "step": 3263, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001190994051285088, + "timestamp": "2025-10-01 03:25:10.878405", + "step": 3264, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:10.915805", + "step": 3264, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014460199512541294, + "timestamp": "2025-10-01 03:25:10.921055", + "step": 3265, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.956719", + "step": 3265, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032500527799129486, + "timestamp": "2025-10-01 03:25:10.959600", + "step": 3266, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:10.992263", + "step": 3266, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022290829569101334, + "timestamp": "2025-10-01 03:25:10.995360", + "step": 3267, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:11.033147", + "step": 3267, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025281844660639763, + "timestamp": "2025-10-01 03:25:11.057589", + "step": 3268, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.099219", + "step": 3268, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03697364777326584, + "timestamp": "2025-10-01 03:25:11.101557", + "step": 3269, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.132998", + "step": 3269, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05658971518278122, + "timestamp": "2025-10-01 03:25:11.136231", + "step": 3270, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.167659", + "step": 3270, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004568137694150209, + "timestamp": "2025-10-01 03:25:11.169748", + "step": 3271, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.206164", + "step": 3271, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004929234739392996, + "timestamp": "2025-10-01 03:25:11.234906", + "step": 3272, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.271526", + "step": 3272, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009538357146084309, + "timestamp": "2025-10-01 03:25:11.273866", + "step": 3273, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.305835", + "step": 3273, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004205197561532259, + "timestamp": "2025-10-01 03:25:11.308350", + "step": 3274, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:11.340438", + "step": 3274, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001234338735230267, + "timestamp": "2025-10-01 03:25:11.343649", + "step": 3275, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.385589", + "step": 3275, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011669367551803589, + "timestamp": "2025-10-01 03:25:11.409369", + "step": 3276, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.445129", + "step": 3276, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007324386853724718, + "timestamp": "2025-10-01 03:25:11.447999", + "step": 3277, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.481103", + "step": 3277, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013648763997480273, + "timestamp": "2025-10-01 03:25:11.486050", + "step": 3278, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.526367", + "step": 3278, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002651963382959366, + "timestamp": "2025-10-01 03:25:11.529548", + "step": 3279, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.568796", + "step": 3279, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005999593995511532, + "timestamp": "2025-10-01 03:25:11.593059", + "step": 3280, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:25:11.632769", + "step": 3280, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001546406769193709, + "timestamp": "2025-10-01 03:25:11.636428", + "step": 3281, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.674894", + "step": 3281, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00179336778819561, + "timestamp": "2025-10-01 03:25:11.680525", + "step": 3282, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.721847", + "step": 3282, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05020619556307793, + "timestamp": "2025-10-01 03:25:11.723663", + "step": 3283, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.757294", + "step": 3283, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00318012828938663, + "timestamp": "2025-10-01 03:25:11.781005", + "step": 3284, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.811476", + "step": 3284, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005362950265407562, + "timestamp": "2025-10-01 03:25:11.813390", + "step": 3285, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.846529", + "step": 3285, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01174105517566204, + "timestamp": "2025-10-01 03:25:11.849051", + "step": 3286, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.880660", + "step": 3286, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004179969895631075, + "timestamp": "2025-10-01 03:25:11.886079", + "step": 3287, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:11.924502", + "step": 3287, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002828664379194379, + "timestamp": "2025-10-01 03:25:11.951306", + "step": 3288, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:11.984270", + "step": 3288, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002730712993070483, + "timestamp": "2025-10-01 03:25:11.987304", + "step": 3289, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.024235", + "step": 3289, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002051942516118288, + "timestamp": "2025-10-01 03:25:12.027792", + "step": 3290, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.062041", + "step": 3290, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018878262490034103, + "timestamp": "2025-10-01 03:25:12.064424", + "step": 3291, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.096089", + "step": 3291, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001770924893207848, + "timestamp": "2025-10-01 03:25:12.120431", + "step": 3292, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:12.153072", + "step": 3292, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005173434037715197, + "timestamp": "2025-10-01 03:25:12.157645", + "step": 3293, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.190917", + "step": 3293, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.043547336012125015, + "timestamp": "2025-10-01 03:25:12.193467", + "step": 3294, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.225668", + "step": 3294, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01968318782746792, + "timestamp": "2025-10-01 03:25:12.229880", + "step": 3295, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:12.270976", + "step": 3295, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032817739993333817, + "timestamp": "2025-10-01 03:25:12.296834", + "step": 3296, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.329331", + "step": 3296, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008139544166624546, + "timestamp": "2025-10-01 03:25:12.331660", + "step": 3297, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.362961", + "step": 3297, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003488200483843684, + "timestamp": "2025-10-01 03:25:12.366921", + "step": 3298, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:12.400185", + "step": 3298, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009402923285961151, + "timestamp": "2025-10-01 03:25:12.403690", + "step": 3299, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.435388", + "step": 3299, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004712823312729597, + "timestamp": "2025-10-01 03:25:12.459076", + "step": 3300, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.490801", + "step": 3300, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012230759486556053, + "timestamp": "2025-10-01 03:25:12.493085", + "step": 3301, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.528786", + "step": 3301, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023459827061742544, + "timestamp": "2025-10-01 03:25:12.531297", + "step": 3302, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.565277", + "step": 3302, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002151545835658908, + "timestamp": "2025-10-01 03:25:12.570967", + "step": 3303, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.606518", + "step": 3303, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002779453992843628, + "timestamp": "2025-10-01 03:25:12.633177", + "step": 3304, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.670751", + "step": 3304, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02932431735098362, + "timestamp": "2025-10-01 03:25:12.676775", + "step": 3305, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.712805", + "step": 3305, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006803030613809824, + "timestamp": "2025-10-01 03:25:12.719810", + "step": 3306, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.756322", + "step": 3306, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003398351836949587, + "timestamp": "2025-10-01 03:25:12.761768", + "step": 3307, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.798312", + "step": 3307, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010517358779907227, + "timestamp": "2025-10-01 03:25:12.823248", + "step": 3308, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:12.857382", + "step": 3308, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04841390997171402, + "timestamp": "2025-10-01 03:25:12.863610", + "step": 3309, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.901185", + "step": 3309, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002887240843847394, + "timestamp": "2025-10-01 03:25:12.903474", + "step": 3310, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.934256", + "step": 3310, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007706192787736654, + "timestamp": "2025-10-01 03:25:12.936702", + "step": 3311, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:12.968804", + "step": 3311, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014397038612514734, + "timestamp": "2025-10-01 03:25:12.992798", + "step": 3312, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:13.028232", + "step": 3312, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05341706424951553, + "timestamp": "2025-10-01 03:25:13.033034", + "step": 3313, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.069927", + "step": 3313, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005345758399926126, + "timestamp": "2025-10-01 03:25:13.076429", + "step": 3314, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.113750", + "step": 3314, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037821216974407434, + "timestamp": "2025-10-01 03:25:13.116416", + "step": 3315, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.150222", + "step": 3315, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019016077741980553, + "timestamp": "2025-10-01 03:25:13.174471", + "step": 3316, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.206865", + "step": 3316, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0064894356764853, + "timestamp": "2025-10-01 03:25:13.209899", + "step": 3317, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.243535", + "step": 3317, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00042673497227951884, + "timestamp": "2025-10-01 03:25:13.245700", + "step": 3318, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.276829", + "step": 3318, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016489967238157988, + "timestamp": "2025-10-01 03:25:13.279353", + "step": 3319, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.311437", + "step": 3319, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005785963148809969, + "timestamp": "2025-10-01 03:25:13.335369", + "step": 3320, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.368406", + "step": 3320, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001857675495557487, + "timestamp": "2025-10-01 03:25:13.373074", + "step": 3321, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.411530", + "step": 3321, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003452996024861932, + "timestamp": "2025-10-01 03:25:13.418935", + "step": 3322, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.452166", + "step": 3322, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008445910178124905, + "timestamp": "2025-10-01 03:25:13.455346", + "step": 3323, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.488753", + "step": 3323, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.07203558832406998, + "timestamp": "2025-10-01 03:25:13.513651", + "step": 3324, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.547214", + "step": 3324, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04663626477122307, + "timestamp": "2025-10-01 03:25:13.549529", + "step": 3325, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.582425", + "step": 3325, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001130128395743668, + "timestamp": "2025-10-01 03:25:13.584954", + "step": 3326, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-10-01 03:25:13.619018", + "step": 3326, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019856046885252, + "timestamp": "2025-10-01 03:25:13.621443", + "step": 3327, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.654345", + "step": 3327, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013760353904217482, + "timestamp": "2025-10-01 03:25:13.680748", + "step": 3328, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.720334", + "step": 3328, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005302447825670242, + "timestamp": "2025-10-01 03:25:13.730136", + "step": 3329, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.770534", + "step": 3329, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.041977737098932266, + "timestamp": "2025-10-01 03:25:13.775195", + "step": 3330, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.809017", + "step": 3330, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.08014635741710663, + "timestamp": "2025-10-01 03:25:13.813523", + "step": 3331, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.847013", + "step": 3331, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01323405560106039, + "timestamp": "2025-10-01 03:25:13.871520", + "step": 3332, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.903616", + "step": 3332, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001433524303138256, + "timestamp": "2025-10-01 03:25:13.907314", + "step": 3333, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:13.939811", + "step": 3333, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04396507889032364, + "timestamp": "2025-10-01 03:25:13.942414", + "step": 3334, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:13.974677", + "step": 3334, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06106859818100929, + "timestamp": "2025-10-01 03:25:13.979123", + "step": 3335, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:14.012194", + "step": 3335, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012024493888020515, + "timestamp": "2025-10-01 03:25:14.038294", + "step": 3336, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:14.075892", + "step": 3336, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010812634136527777, + "timestamp": "2025-10-01 03:25:14.080247", + "step": 3337, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:14.119399", + "step": 3337, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00040871388046070933, + "timestamp": "2025-10-01 03:25:14.122854", + "step": 3338, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:14.156462", + "step": 3338, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004583470872603357, + "timestamp": "2025-10-01 03:25:14.159324", + "step": 3339, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:14.190584", + "step": 3339, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020728406962007284, + "timestamp": "2025-10-01 03:25:14.214652", + "step": 3340, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:14.247688", + "step": 3340, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010753754759207368, + "timestamp": "2025-10-01 03:25:14.250720", + "step": 3341, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:14.285240", + "step": 3341, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014183297753334045, + "timestamp": "2025-10-01 03:25:14.287754", + "step": 3342, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:14.322527", + "step": 3342, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017137732356786728, + "timestamp": "2025-10-01 03:25:14.324943", + "step": 3343, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:14.356601", + "step": 3343, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025604539550840855, + "timestamp": "2025-10-01 03:25:14.383061", + "step": 3344, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:25:16.614303", + "step": 3344, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2602008.89518411, + "timestamp": "2025-10-01 03:25:16.618037", + "step": 3344, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:16.648412", + "step": 3344, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030295494943857193, + "timestamp": "2025-10-01 03:25:16.651874", + "step": 3345, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:16.685903", + "step": 3345, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005261518061161041, + "timestamp": "2025-10-01 03:25:16.689256", + "step": 3346, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:16.720980", + "step": 3346, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00489491131156683, + "timestamp": "2025-10-01 03:25:16.723997", + "step": 3347, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:16.755865", + "step": 3347, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01797039434313774, + "timestamp": "2025-10-01 03:25:16.779923", + "step": 3348, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:16.810850", + "step": 3348, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010059533640742302, + "timestamp": "2025-10-01 03:25:16.815246", + "step": 3349, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:16.848181", + "step": 3349, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029392687138170004, + "timestamp": "2025-10-01 03:25:16.854689", + "step": 3350, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:16.895354", + "step": 3350, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010853076819330454, + "timestamp": "2025-10-01 03:25:16.900716", + "step": 3351, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:16.936300", + "step": 3351, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03288063779473305, + "timestamp": "2025-10-01 03:25:16.961646", + "step": 3352, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:16.999831", + "step": 3352, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033975113183259964, + "timestamp": "2025-10-01 03:25:17.002429", + "step": 3353, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.035733", + "step": 3353, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03250426799058914, + "timestamp": "2025-10-01 03:25:17.039255", + "step": 3354, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.071302", + "step": 3354, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00032783643109723926, + "timestamp": "2025-10-01 03:25:17.074971", + "step": 3355, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.116807", + "step": 3355, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002693223301321268, + "timestamp": "2025-10-01 03:25:17.143372", + "step": 3356, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.178085", + "step": 3356, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05166058987379074, + "timestamp": "2025-10-01 03:25:17.183134", + "step": 3357, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.215521", + "step": 3357, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019410208333283663, + "timestamp": "2025-10-01 03:25:17.219694", + "step": 3358, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.251173", + "step": 3358, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028412459418177605, + "timestamp": "2025-10-01 03:25:17.253529", + "step": 3359, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:17.284872", + "step": 3359, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022556581534445286, + "timestamp": "2025-10-01 03:25:17.311807", + "step": 3360, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:17.346940", + "step": 3360, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019876252859830856, + "timestamp": "2025-10-01 03:25:17.350898", + "step": 3361, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.390416", + "step": 3361, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003119922708719969, + "timestamp": "2025-10-01 03:25:17.392902", + "step": 3362, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.425105", + "step": 3362, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010088798590004444, + "timestamp": "2025-10-01 03:25:17.433657", + "step": 3363, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.471973", + "step": 3363, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010723911225795746, + "timestamp": "2025-10-01 03:25:17.496682", + "step": 3364, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.529401", + "step": 3364, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0361340306699276, + "timestamp": "2025-10-01 03:25:17.531628", + "step": 3365, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.562790", + "step": 3365, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.034135524183511734, + "timestamp": "2025-10-01 03:25:17.565025", + "step": 3366, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.596847", + "step": 3366, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007921603391878307, + "timestamp": "2025-10-01 03:25:17.599398", + "step": 3367, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.631396", + "step": 3367, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.049092113971710205, + "timestamp": "2025-10-01 03:25:17.662372", + "step": 3368, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.697345", + "step": 3368, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0055750696919858456, + "timestamp": "2025-10-01 03:25:17.703636", + "step": 3369, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.735245", + "step": 3369, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036615978460758924, + "timestamp": "2025-10-01 03:25:17.738727", + "step": 3370, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.771015", + "step": 3370, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.033474940806627274, + "timestamp": "2025-10-01 03:25:17.773319", + "step": 3371, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.808663", + "step": 3371, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014221475459635258, + "timestamp": "2025-10-01 03:25:17.833962", + "step": 3372, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.877922", + "step": 3372, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03370310366153717, + "timestamp": "2025-10-01 03:25:17.883259", + "step": 3373, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.917352", + "step": 3373, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03699374943971634, + "timestamp": "2025-10-01 03:25:17.920944", + "step": 3374, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.952802", + "step": 3374, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01537253987044096, + "timestamp": "2025-10-01 03:25:17.955976", + "step": 3375, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:17.989605", + "step": 3375, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021142691373825073, + "timestamp": "2025-10-01 03:25:18.015178", + "step": 3376, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.048513", + "step": 3376, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0058901021257042885, + "timestamp": "2025-10-01 03:25:18.051749", + "step": 3377, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.085999", + "step": 3377, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025154289323836565, + "timestamp": "2025-10-01 03:25:18.091594", + "step": 3378, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.130478", + "step": 3378, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007987062446773052, + "timestamp": "2025-10-01 03:25:18.133009", + "step": 3379, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.163666", + "step": 3379, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014217027463018894, + "timestamp": "2025-10-01 03:25:18.191022", + "step": 3380, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.236515", + "step": 3380, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003529178211465478, + "timestamp": "2025-10-01 03:25:18.240017", + "step": 3381, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:18.271581", + "step": 3381, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002111425856128335, + "timestamp": "2025-10-01 03:25:18.274177", + "step": 3382, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:18.305139", + "step": 3382, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016905304044485092, + "timestamp": "2025-10-01 03:25:18.308121", + "step": 3383, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:18.340997", + "step": 3383, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008216963149607182, + "timestamp": "2025-10-01 03:25:18.365453", + "step": 3384, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:18.400526", + "step": 3384, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005846628453582525, + "timestamp": "2025-10-01 03:25:18.408123", + "step": 3385, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.443899", + "step": 3385, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022869836539030075, + "timestamp": "2025-10-01 03:25:18.448035", + "step": 3386, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.479841", + "step": 3386, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04855891689658165, + "timestamp": "2025-10-01 03:25:18.486684", + "step": 3387, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:18.517458", + "step": 3387, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006421183701604605, + "timestamp": "2025-10-01 03:25:18.544967", + "step": 3388, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.582141", + "step": 3388, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038406946696341038, + "timestamp": "2025-10-01 03:25:18.584427", + "step": 3389, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.615223", + "step": 3389, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002440615091472864, + "timestamp": "2025-10-01 03:25:18.617554", + "step": 3390, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.648897", + "step": 3390, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018362659960985184, + "timestamp": "2025-10-01 03:25:18.653446", + "step": 3391, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.688382", + "step": 3391, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006790074985474348, + "timestamp": "2025-10-01 03:25:18.713639", + "step": 3392, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.747266", + "step": 3392, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016262579709291458, + "timestamp": "2025-10-01 03:25:18.751156", + "step": 3393, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.782516", + "step": 3393, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012016936670988798, + "timestamp": "2025-10-01 03:25:18.785295", + "step": 3394, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.818838", + "step": 3394, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003718756139278412, + "timestamp": "2025-10-01 03:25:18.821916", + "step": 3395, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.853672", + "step": 3395, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011485490016639233, + "timestamp": "2025-10-01 03:25:18.878148", + "step": 3396, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:25:18.911281", + "step": 3396, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005359877832233906, + "timestamp": "2025-10-01 03:25:18.916029", + "step": 3397, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.952242", + "step": 3397, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038840104825794697, + "timestamp": "2025-10-01 03:25:18.956135", + "step": 3398, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:18.991233", + "step": 3398, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028838643338531256, + "timestamp": "2025-10-01 03:25:18.993548", + "step": 3399, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.027778", + "step": 3399, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011122160591185093, + "timestamp": "2025-10-01 03:25:19.054524", + "step": 3400, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.090472", + "step": 3400, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.033865299075841904, + "timestamp": "2025-10-01 03:25:19.094196", + "step": 3401, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.127866", + "step": 3401, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020030357409268618, + "timestamp": "2025-10-01 03:25:19.130334", + "step": 3402, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:19.162186", + "step": 3402, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018117351457476616, + "timestamp": "2025-10-01 03:25:19.164568", + "step": 3403, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.199907", + "step": 3403, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035751929972320795, + "timestamp": "2025-10-01 03:25:19.224493", + "step": 3404, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.261905", + "step": 3404, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005065847188234329, + "timestamp": "2025-10-01 03:25:19.264128", + "step": 3405, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.295409", + "step": 3405, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004021007567644119, + "timestamp": "2025-10-01 03:25:19.297789", + "step": 3406, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.328557", + "step": 3406, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018424654845148325, + "timestamp": "2025-10-01 03:25:19.330749", + "step": 3407, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:19.361549", + "step": 3407, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022080237977206707, + "timestamp": "2025-10-01 03:25:19.385567", + "step": 3408, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.426276", + "step": 3408, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035632144659757614, + "timestamp": "2025-10-01 03:25:19.432639", + "step": 3409, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.468674", + "step": 3409, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04607214406132698, + "timestamp": "2025-10-01 03:25:19.472936", + "step": 3410, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.511250", + "step": 3410, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030419875402003527, + "timestamp": "2025-10-01 03:25:19.514384", + "step": 3411, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.548836", + "step": 3411, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.07343268394470215, + "timestamp": "2025-10-01 03:25:19.573957", + "step": 3412, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.610937", + "step": 3412, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01623525097966194, + "timestamp": "2025-10-01 03:25:19.613973", + "step": 3413, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.648074", + "step": 3413, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018635882064700127, + "timestamp": "2025-10-01 03:25:19.651369", + "step": 3414, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:19.686082", + "step": 3414, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006775646470487118, + "timestamp": "2025-10-01 03:25:19.689195", + "step": 3415, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.723854", + "step": 3415, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01896272599697113, + "timestamp": "2025-10-01 03:25:19.748493", + "step": 3416, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.780843", + "step": 3416, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007707604207098484, + "timestamp": "2025-10-01 03:25:19.785814", + "step": 3417, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.826009", + "step": 3417, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007559919264167547, + "timestamp": "2025-10-01 03:25:19.832388", + "step": 3418, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.868103", + "step": 3418, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012141570448875427, + "timestamp": "2025-10-01 03:25:19.870992", + "step": 3419, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.904146", + "step": 3419, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002962252590805292, + "timestamp": "2025-10-01 03:25:19.930411", + "step": 3420, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:19.963861", + "step": 3420, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009027594700455666, + "timestamp": "2025-10-01 03:25:19.966714", + "step": 3421, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:20.006026", + "step": 3421, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006412337999790907, + "timestamp": "2025-10-01 03:25:20.009313", + "step": 3422, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.044614", + "step": 3422, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023068655282258987, + "timestamp": "2025-10-01 03:25:20.047124", + "step": 3423, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.078987", + "step": 3423, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020098325330764055, + "timestamp": "2025-10-01 03:25:20.102778", + "step": 3424, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.138295", + "step": 3424, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006497477646917105, + "timestamp": "2025-10-01 03:25:20.144818", + "step": 3425, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.180495", + "step": 3425, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04413042217493057, + "timestamp": "2025-10-01 03:25:20.186062", + "step": 3426, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:20.221414", + "step": 3426, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015821788692846894, + "timestamp": "2025-10-01 03:25:20.223874", + "step": 3427, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:20.269109", + "step": 3427, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020621262956410646, + "timestamp": "2025-10-01 03:25:20.292922", + "step": 3428, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.325627", + "step": 3428, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015564137138426304, + "timestamp": "2025-10-01 03:25:20.328026", + "step": 3429, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.359567", + "step": 3429, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005354769993573427, + "timestamp": "2025-10-01 03:25:20.361989", + "step": 3430, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.393247", + "step": 3430, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026920054107904434, + "timestamp": "2025-10-01 03:25:20.395517", + "step": 3431, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:20.431316", + "step": 3431, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020497809164226055, + "timestamp": "2025-10-01 03:25:20.459729", + "step": 3432, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.491529", + "step": 3432, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.031134381890296936, + "timestamp": "2025-10-01 03:25:20.497265", + "step": 3433, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.530974", + "step": 3433, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020492379553616047, + "timestamp": "2025-10-01 03:25:20.533192", + "step": 3434, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.565499", + "step": 3434, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013453485444188118, + "timestamp": "2025-10-01 03:25:20.568474", + "step": 3435, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:20.601280", + "step": 3435, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004028747323900461, + "timestamp": "2025-10-01 03:25:20.626055", + "step": 3436, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.657983", + "step": 3436, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0051038311794400215, + "timestamp": "2025-10-01 03:25:20.663868", + "step": 3437, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.700382", + "step": 3437, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00210418994538486, + "timestamp": "2025-10-01 03:25:20.718198", + "step": 3438, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.756758", + "step": 3438, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027592144906520844, + "timestamp": "2025-10-01 03:25:20.774699", + "step": 3439, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.819737", + "step": 3439, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002140672178938985, + "timestamp": "2025-10-01 03:25:20.858990", + "step": 3440, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.905666", + "step": 3440, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03320576995611191, + "timestamp": "2025-10-01 03:25:20.924738", + "step": 3441, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:20.969936", + "step": 3441, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006257470231503248, + "timestamp": "2025-10-01 03:25:20.988358", + "step": 3442, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.034404", + "step": 3442, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002069654408842325, + "timestamp": "2025-10-01 03:25:21.044786", + "step": 3443, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.080303", + "step": 3443, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014158381149172783, + "timestamp": "2025-10-01 03:25:21.105514", + "step": 3444, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.140256", + "step": 3444, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03630415350198746, + "timestamp": "2025-10-01 03:25:21.149756", + "step": 3445, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.185295", + "step": 3445, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013003699481487274, + "timestamp": "2025-10-01 03:25:21.195274", + "step": 3446, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.235233", + "step": 3446, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024364616721868515, + "timestamp": "2025-10-01 03:25:21.246740", + "step": 3447, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.288354", + "step": 3447, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001637054025195539, + "timestamp": "2025-10-01 03:25:21.313811", + "step": 3448, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.354795", + "step": 3448, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009740905836224556, + "timestamp": "2025-10-01 03:25:21.363470", + "step": 3449, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:21.401947", + "step": 3449, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004558487329632044, + "timestamp": "2025-10-01 03:25:21.408427", + "step": 3450, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.448327", + "step": 3450, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009898722171783447, + "timestamp": "2025-10-01 03:25:21.451963", + "step": 3451, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.491605", + "step": 3451, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008330484270118177, + "timestamp": "2025-10-01 03:25:21.521910", + "step": 3452, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:21.559712", + "step": 3452, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04550279304385185, + "timestamp": "2025-10-01 03:25:21.566012", + "step": 3453, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.606593", + "step": 3453, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012962801847606897, + "timestamp": "2025-10-01 03:25:21.615310", + "step": 3454, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.653726", + "step": 3454, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010211523622274399, + "timestamp": "2025-10-01 03:25:21.657615", + "step": 3455, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:21.690163", + "step": 3455, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013175460044294596, + "timestamp": "2025-10-01 03:25:21.720810", + "step": 3456, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.755501", + "step": 3456, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009884529747068882, + "timestamp": "2025-10-01 03:25:21.765740", + "step": 3457, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.803351", + "step": 3457, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016654551029205322, + "timestamp": "2025-10-01 03:25:21.807576", + "step": 3458, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.840437", + "step": 3458, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020570088177919388, + "timestamp": "2025-10-01 03:25:21.852162", + "step": 3459, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.893273", + "step": 3459, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016215263167396188, + "timestamp": "2025-10-01 03:25:21.922997", + "step": 3460, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:21.962089", + "step": 3460, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023950446397066116, + "timestamp": "2025-10-01 03:25:21.974326", + "step": 3461, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.014360", + "step": 3461, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019527232507243752, + "timestamp": "2025-10-01 03:25:22.022158", + "step": 3462, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:22.059442", + "step": 3462, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00605428172275424, + "timestamp": "2025-10-01 03:25:22.070050", + "step": 3463, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.105470", + "step": 3463, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02204175479710102, + "timestamp": "2025-10-01 03:25:22.135466", + "step": 3464, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.175174", + "step": 3464, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011294083669781685, + "timestamp": "2025-10-01 03:25:22.178912", + "step": 3465, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.212022", + "step": 3465, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00045823471737094223, + "timestamp": "2025-10-01 03:25:22.219164", + "step": 3466, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.256248", + "step": 3466, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011313079157844186, + "timestamp": "2025-10-01 03:25:22.260265", + "step": 3467, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.292986", + "step": 3467, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007768020965158939, + "timestamp": "2025-10-01 03:25:22.324155", + "step": 3468, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:22.363582", + "step": 3468, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009220434003509581, + "timestamp": "2025-10-01 03:25:22.370079", + "step": 3469, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.409005", + "step": 3469, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006145221414044499, + "timestamp": "2025-10-01 03:25:22.418608", + "step": 3470, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.454943", + "step": 3470, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028209498152136803, + "timestamp": "2025-10-01 03:25:22.458430", + "step": 3471, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.491984", + "step": 3471, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009078130824491382, + "timestamp": "2025-10-01 03:25:22.521555", + "step": 3472, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.556416", + "step": 3472, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028782349545508623, + "timestamp": "2025-10-01 03:25:22.563871", + "step": 3473, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:22.598780", + "step": 3473, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006859448621980846, + "timestamp": "2025-10-01 03:25:22.606584", + "step": 3474, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.645142", + "step": 3474, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004552422557026148, + "timestamp": "2025-10-01 03:25:22.648846", + "step": 3475, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.684206", + "step": 3475, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.047390881925821304, + "timestamp": "2025-10-01 03:25:22.713084", + "step": 3476, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.745035", + "step": 3476, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007824193686246872, + "timestamp": "2025-10-01 03:25:22.755009", + "step": 3477, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:22.792177", + "step": 3477, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0255903210490942, + "timestamp": "2025-10-01 03:25:22.801792", + "step": 3478, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.839677", + "step": 3478, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030381863471120596, + "timestamp": "2025-10-01 03:25:22.848391", + "step": 3479, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.889438", + "step": 3479, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009444569004699588, + "timestamp": "2025-10-01 03:25:22.918798", + "step": 3480, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:22.956209", + "step": 3480, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004343718464951962, + "timestamp": "2025-10-01 03:25:22.964400", + "step": 3481, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:23.002047", + "step": 3481, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005920399562455714, + "timestamp": "2025-10-01 03:25:23.009748", + "step": 3482, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:23.049341", + "step": 3482, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011205413611605763, + "timestamp": "2025-10-01 03:25:23.056491", + "step": 3483, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:23.091272", + "step": 3483, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03631141409277916, + "timestamp": "2025-10-01 03:25:23.123416", + "step": 3484, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:23.157535", + "step": 3484, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006039091385900974, + "timestamp": "2025-10-01 03:25:23.166076", + "step": 3485, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:23.201866", + "step": 3485, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020007421262562275, + "timestamp": "2025-10-01 03:25:23.208523", + "step": 3486, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:23.244772", + "step": 3486, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02282475307583809, + "timestamp": "2025-10-01 03:25:23.251934", + "step": 3487, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:23.288546", + "step": 3487, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000257406645687297, + "timestamp": "2025-10-01 03:25:23.317372", + "step": 3488, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:23.355430", + "step": 3488, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02078704535961151, + "timestamp": "2025-10-01 03:25:23.362562", + "step": 3489, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:23.400441", + "step": 3489, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022419406101107597, + "timestamp": "2025-10-01 03:25:23.411538", + "step": 3490, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:23.451688", + "step": 3490, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03495969995856285, + "timestamp": "2025-10-01 03:25:23.456380", + "step": 3491, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:23.488777", + "step": 3491, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007138541550375521, + "timestamp": "2025-10-01 03:25:23.519609", + "step": 3492, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:23.557786", + "step": 3492, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005499590188264847, + "timestamp": "2025-10-01 03:25:23.568283", + "step": 3493, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:23.607323", + "step": 3493, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04227996990084648, + "timestamp": "2025-10-01 03:25:23.615739", + "step": 3494, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:23.654013", + "step": 3494, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001526520703919232, + "timestamp": "2025-10-01 03:25:23.657642", + "step": 3495, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:23.695422", + "step": 3495, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005888790474273264, + "timestamp": "2025-10-01 03:25:23.726689", + "step": 3496, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:25:25.922233", + "step": 3496, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2532331.447555987, + "timestamp": "2025-10-01 03:25:25.929156", + "step": 3496, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:25.966294", + "step": 3496, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011667234357446432, + "timestamp": "2025-10-01 03:25:25.975606", + "step": 3497, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:26.015280", + "step": 3497, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027379717212170362, + "timestamp": "2025-10-01 03:25:26.026272", + "step": 3498, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:26.063124", + "step": 3498, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.039456747472286224, + "timestamp": "2025-10-01 03:25:26.067676", + "step": 3499, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:26.101962", + "step": 3499, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005751636810600758, + "timestamp": "2025-10-01 03:25:26.131821", + "step": 3500, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 3500", + "timestamp": "2025-10-01 03:25:31.092037", + "step": 3500, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.127125", + "step": 3500, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002122236881405115, + "timestamp": "2025-10-01 03:25:31.131331", + "step": 3501, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.168145", + "step": 3501, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026296719443053007, + "timestamp": "2025-10-01 03:25:31.174388", + "step": 3502, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.210065", + "step": 3502, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016362236347049475, + "timestamp": "2025-10-01 03:25:31.217481", + "step": 3503, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.250179", + "step": 3503, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00051133829401806, + "timestamp": "2025-10-01 03:25:31.279284", + "step": 3504, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.314892", + "step": 3504, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002709451364353299, + "timestamp": "2025-10-01 03:25:31.318448", + "step": 3505, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.354543", + "step": 3505, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014071858488023281, + "timestamp": "2025-10-01 03:25:31.358076", + "step": 3506, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:31.391993", + "step": 3506, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003424133639782667, + "timestamp": "2025-10-01 03:25:31.399815", + "step": 3507, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.432537", + "step": 3507, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000858012237586081, + "timestamp": "2025-10-01 03:25:31.461653", + "step": 3508, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.498060", + "step": 3508, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005231899558566511, + "timestamp": "2025-10-01 03:25:31.502408", + "step": 3509, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.540080", + "step": 3509, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025830951053649187, + "timestamp": "2025-10-01 03:25:31.543997", + "step": 3510, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.582706", + "step": 3510, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009741801186464727, + "timestamp": "2025-10-01 03:25:31.588779", + "step": 3511, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.626817", + "step": 3511, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.07706137746572495, + "timestamp": "2025-10-01 03:25:31.650562", + "step": 3512, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.683860", + "step": 3512, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010299156419932842, + "timestamp": "2025-10-01 03:25:31.687357", + "step": 3513, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.719803", + "step": 3513, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022459339816123247, + "timestamp": "2025-10-01 03:25:31.728234", + "step": 3514, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:31.766899", + "step": 3514, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001680448418483138, + "timestamp": "2025-10-01 03:25:31.772438", + "step": 3515, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.809127", + "step": 3515, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009483793750405312, + "timestamp": "2025-10-01 03:25:31.835260", + "step": 3516, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.873970", + "step": 3516, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005893072811886668, + "timestamp": "2025-10-01 03:25:31.877347", + "step": 3517, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.911608", + "step": 3517, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033416678197681904, + "timestamp": "2025-10-01 03:25:31.916881", + "step": 3518, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.950446", + "step": 3518, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00026284580235369503, + "timestamp": "2025-10-01 03:25:31.956587", + "step": 3519, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:31.989067", + "step": 3519, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012600880581885576, + "timestamp": "2025-10-01 03:25:32.019165", + "step": 3520, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.051836", + "step": 3520, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017487138509750366, + "timestamp": "2025-10-01 03:25:32.060083", + "step": 3521, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:32.096858", + "step": 3521, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007054847083054483, + "timestamp": "2025-10-01 03:25:32.105372", + "step": 3522, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.141038", + "step": 3522, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00044121561222709715, + "timestamp": "2025-10-01 03:25:32.145022", + "step": 3523, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.189614", + "step": 3523, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02244669385254383, + "timestamp": "2025-10-01 03:25:32.214810", + "step": 3524, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.247349", + "step": 3524, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00023180388961918652, + "timestamp": "2025-10-01 03:25:32.256706", + "step": 3525, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.296818", + "step": 3525, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004151902161538601, + "timestamp": "2025-10-01 03:25:32.306096", + "step": 3526, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:32.347789", + "step": 3526, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008327068062499166, + "timestamp": "2025-10-01 03:25:32.356002", + "step": 3527, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.393331", + "step": 3527, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002984424354508519, + "timestamp": "2025-10-01 03:25:32.423425", + "step": 3528, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.464272", + "step": 3528, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014226136729121208, + "timestamp": "2025-10-01 03:25:32.481926", + "step": 3529, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.528477", + "step": 3529, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013611579313874245, + "timestamp": "2025-10-01 03:25:32.548755", + "step": 3530, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.596442", + "step": 3530, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.056081730872392654, + "timestamp": "2025-10-01 03:25:32.617011", + "step": 3531, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.666725", + "step": 3531, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015950821340084076, + "timestamp": "2025-10-01 03:25:32.708622", + "step": 3532, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.750548", + "step": 3532, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004944683518260717, + "timestamp": "2025-10-01 03:25:32.759380", + "step": 3533, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:32.801667", + "step": 3533, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002060875529423356, + "timestamp": "2025-10-01 03:25:32.811156", + "step": 3534, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.844202", + "step": 3534, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015152735635638237, + "timestamp": "2025-10-01 03:25:32.850941", + "step": 3535, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.887061", + "step": 3535, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002878794213756919, + "timestamp": "2025-10-01 03:25:32.913578", + "step": 3536, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:32.950668", + "step": 3536, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04362203925848007, + "timestamp": "2025-10-01 03:25:32.961507", + "step": 3537, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:33.000161", + "step": 3537, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010338505962863564, + "timestamp": "2025-10-01 03:25:33.010091", + "step": 3538, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:33.047531", + "step": 3538, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005570573266595602, + "timestamp": "2025-10-01 03:25:33.052457", + "step": 3539, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:33.085393", + "step": 3539, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002781972289085388, + "timestamp": "2025-10-01 03:25:33.109864", + "step": 3540, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:33.141187", + "step": 3540, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021046402398496866, + "timestamp": "2025-10-01 03:25:33.144896", + "step": 3541, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:33.178364", + "step": 3541, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03163104131817818, + "timestamp": "2025-10-01 03:25:33.181542", + "step": 3542, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:33.212718", + "step": 3542, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002444853773340583, + "timestamp": "2025-10-01 03:25:33.215762", + "step": 3543, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:33.249393", + "step": 3543, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012538658920675516, + "timestamp": "2025-10-01 03:25:33.273824", + "step": 3544, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:33.305874", + "step": 3544, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024823197163641453, + "timestamp": "2025-10-01 03:25:33.311431", + "step": 3545, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:33.347802", + "step": 3545, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.049153972417116165, + "timestamp": "2025-10-01 03:25:33.356732", + "step": 3546, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:33.395691", + "step": 3546, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002075204683933407, + "timestamp": "2025-10-01 03:25:33.401063", + "step": 3547, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:33.432336", + "step": 3547, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026550818234682083, + "timestamp": "2025-10-01 03:25:33.457383", + "step": 3548, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:33.495010", + "step": 3548, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003019218274857849, + "timestamp": "2025-10-01 03:25:33.500762", + "step": 3549, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:33.535289", + "step": 3549, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01825217343866825, + "timestamp": "2025-10-01 03:25:33.540151", + "step": 3550, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:33.573636", + "step": 3550, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015917258337140083, + "timestamp": "2025-10-01 03:25:33.577546", + "step": 3551, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:33.612945", + "step": 3551, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038949339650571346, + "timestamp": "2025-10-01 03:25:33.644780", + "step": 3552, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:33.688236", + "step": 3552, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003564103040844202, + "timestamp": "2025-10-01 03:25:33.700619", + "step": 3553, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:33.740348", + "step": 3553, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01937287673354149, + "timestamp": "2025-10-01 03:25:33.753326", + "step": 3554, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:33.796623", + "step": 3554, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014508334919810295, + "timestamp": "2025-10-01 03:25:33.816605", + "step": 3555, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:33.865616", + "step": 3555, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009006249718368053, + "timestamp": "2025-10-01 03:25:33.906687", + "step": 3556, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:33.953223", + "step": 3556, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014529538340866566, + "timestamp": "2025-10-01 03:25:33.972061", + "step": 3557, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.019305", + "step": 3557, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006778027745895088, + "timestamp": "2025-10-01 03:25:34.038689", + "step": 3558, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.086198", + "step": 3558, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002112393034622073, + "timestamp": "2025-10-01 03:25:34.091468", + "step": 3559, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.129334", + "step": 3559, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004309036768972874, + "timestamp": "2025-10-01 03:25:34.169742", + "step": 3560, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:34.215865", + "step": 3560, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009699096903204918, + "timestamp": "2025-10-01 03:25:34.234942", + "step": 3561, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.282430", + "step": 3561, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012086115777492523, + "timestamp": "2025-10-01 03:25:34.302048", + "step": 3562, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.347726", + "step": 3562, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003106823714915663, + "timestamp": "2025-10-01 03:25:34.371972", + "step": 3563, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.418504", + "step": 3563, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035137489438056946, + "timestamp": "2025-10-01 03:25:34.445042", + "step": 3564, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.494561", + "step": 3564, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013792266137897968, + "timestamp": "2025-10-01 03:25:34.508614", + "step": 3565, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:34.548199", + "step": 3565, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028142372611910105, + "timestamp": "2025-10-01 03:25:34.559861", + "step": 3566, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.596503", + "step": 3566, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02071036398410797, + "timestamp": "2025-10-01 03:25:34.601823", + "step": 3567, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.640137", + "step": 3567, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004633808974176645, + "timestamp": "2025-10-01 03:25:34.666385", + "step": 3568, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.709843", + "step": 3568, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016435615718364716, + "timestamp": "2025-10-01 03:25:34.722133", + "step": 3569, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.762390", + "step": 3569, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001717503764666617, + "timestamp": "2025-10-01 03:25:34.774561", + "step": 3570, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.815481", + "step": 3570, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030378021765500307, + "timestamp": "2025-10-01 03:25:34.828130", + "step": 3571, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.866993", + "step": 3571, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003965519834309816, + "timestamp": "2025-10-01 03:25:34.898446", + "step": 3572, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:34.931930", + "step": 3572, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00032112604822032154, + "timestamp": "2025-10-01 03:25:34.936812", + "step": 3573, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:34.969331", + "step": 3573, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026656214147806168, + "timestamp": "2025-10-01 03:25:34.973438", + "step": 3574, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.015615", + "step": 3574, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012014331296086311, + "timestamp": "2025-10-01 03:25:35.020895", + "step": 3575, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.062545", + "step": 3575, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009944384917616844, + "timestamp": "2025-10-01 03:25:35.088064", + "step": 3576, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.132166", + "step": 3576, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00040751529741100967, + "timestamp": "2025-10-01 03:25:35.136118", + "step": 3577, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.168361", + "step": 3577, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014173452742397785, + "timestamp": "2025-10-01 03:25:35.184234", + "step": 3578, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.226121", + "step": 3578, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018011757638305426, + "timestamp": "2025-10-01 03:25:35.238977", + "step": 3579, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.281340", + "step": 3579, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009908685460686684, + "timestamp": "2025-10-01 03:25:35.317611", + "step": 3580, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.351232", + "step": 3580, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002270175376906991, + "timestamp": "2025-10-01 03:25:35.365403", + "step": 3581, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.401571", + "step": 3581, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013878131285309792, + "timestamp": "2025-10-01 03:25:35.414955", + "step": 3582, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.465060", + "step": 3582, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02419423684477806, + "timestamp": "2025-10-01 03:25:35.469531", + "step": 3583, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.512016", + "step": 3583, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0061748879961669445, + "timestamp": "2025-10-01 03:25:35.538413", + "step": 3584, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.571294", + "step": 3584, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014424575492739677, + "timestamp": "2025-10-01 03:25:35.576153", + "step": 3585, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.610212", + "step": 3585, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010142347309738398, + "timestamp": "2025-10-01 03:25:35.621916", + "step": 3586, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.664797", + "step": 3586, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027360226958990097, + "timestamp": "2025-10-01 03:25:35.669751", + "step": 3587, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.704311", + "step": 3587, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011219099396839738, + "timestamp": "2025-10-01 03:25:35.737264", + "step": 3588, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.779220", + "step": 3588, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020503129810094833, + "timestamp": "2025-10-01 03:25:35.791845", + "step": 3589, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.833547", + "step": 3589, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021163439378142357, + "timestamp": "2025-10-01 03:25:35.843330", + "step": 3590, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.886052", + "step": 3590, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003574747359380126, + "timestamp": "2025-10-01 03:25:35.896411", + "step": 3591, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:35.948375", + "step": 3591, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018874300876632333, + "timestamp": "2025-10-01 03:25:35.980969", + "step": 3592, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.021736", + "step": 3592, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030020426958799362, + "timestamp": "2025-10-01 03:25:36.025326", + "step": 3593, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.060335", + "step": 3593, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035288650542497635, + "timestamp": "2025-10-01 03:25:36.072375", + "step": 3594, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:36.119105", + "step": 3594, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012421899009495974, + "timestamp": "2025-10-01 03:25:36.122677", + "step": 3595, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:36.155383", + "step": 3595, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011734797153621912, + "timestamp": "2025-10-01 03:25:36.189326", + "step": 3596, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.221376", + "step": 3596, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06555880606174469, + "timestamp": "2025-10-01 03:25:36.233662", + "step": 3597, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.274843", + "step": 3597, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004546067211776972, + "timestamp": "2025-10-01 03:25:36.284590", + "step": 3598, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.324832", + "step": 3598, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002050773473456502, + "timestamp": "2025-10-01 03:25:36.332811", + "step": 3599, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.375599", + "step": 3599, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013062627986073494, + "timestamp": "2025-10-01 03:25:36.406905", + "step": 3600, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:36.447621", + "step": 3600, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001462722779251635, + "timestamp": "2025-10-01 03:25:36.461929", + "step": 3601, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.504304", + "step": 3601, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0067664459347724915, + "timestamp": "2025-10-01 03:25:36.515753", + "step": 3602, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:36.556136", + "step": 3602, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038016636390239, + "timestamp": "2025-10-01 03:25:36.561122", + "step": 3603, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.597611", + "step": 3603, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009677761234343052, + "timestamp": "2025-10-01 03:25:36.623714", + "step": 3604, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:36.659377", + "step": 3604, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0074912686832249165, + "timestamp": "2025-10-01 03:25:36.665557", + "step": 3605, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.700479", + "step": 3605, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001955330662894994, + "timestamp": "2025-10-01 03:25:36.704466", + "step": 3606, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.739508", + "step": 3606, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001762888627126813, + "timestamp": "2025-10-01 03:25:36.745074", + "step": 3607, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.779985", + "step": 3607, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03225670009851456, + "timestamp": "2025-10-01 03:25:36.810748", + "step": 3608, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.846387", + "step": 3608, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006490348605439067, + "timestamp": "2025-10-01 03:25:36.856261", + "step": 3609, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.889872", + "step": 3609, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001340194488875568, + "timestamp": "2025-10-01 03:25:36.896816", + "step": 3610, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.931730", + "step": 3610, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007708277553319931, + "timestamp": "2025-10-01 03:25:36.935071", + "step": 3611, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:36.971999", + "step": 3611, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005716033629141748, + "timestamp": "2025-10-01 03:25:37.000516", + "step": 3612, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.036273", + "step": 3612, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005379324313253164, + "timestamp": "2025-10-01 03:25:37.040399", + "step": 3613, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:37.084396", + "step": 3613, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007308292551897466, + "timestamp": "2025-10-01 03:25:37.095097", + "step": 3614, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:25:37.139849", + "step": 3614, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04726659506559372, + "timestamp": "2025-10-01 03:25:37.151380", + "step": 3615, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.192587", + "step": 3615, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024644924327731133, + "timestamp": "2025-10-01 03:25:37.227411", + "step": 3616, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.266498", + "step": 3616, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004774771223310381, + "timestamp": "2025-10-01 03:25:37.273748", + "step": 3617, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.312921", + "step": 3617, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010318489745259285, + "timestamp": "2025-10-01 03:25:37.321899", + "step": 3618, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.359767", + "step": 3618, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029414601158350706, + "timestamp": "2025-10-01 03:25:37.365871", + "step": 3619, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.410664", + "step": 3619, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.048916254192590714, + "timestamp": "2025-10-01 03:25:37.440295", + "step": 3620, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.475079", + "step": 3620, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05967896059155464, + "timestamp": "2025-10-01 03:25:37.480303", + "step": 3621, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.515677", + "step": 3621, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005718478001654148, + "timestamp": "2025-10-01 03:25:37.522938", + "step": 3622, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.559151", + "step": 3622, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017414145171642303, + "timestamp": "2025-10-01 03:25:37.564861", + "step": 3623, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.603088", + "step": 3623, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006165620870888233, + "timestamp": "2025-10-01 03:25:37.632047", + "step": 3624, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.669049", + "step": 3624, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013398696901276708, + "timestamp": "2025-10-01 03:25:37.673793", + "step": 3625, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.710549", + "step": 3625, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00042306631803512573, + "timestamp": "2025-10-01 03:25:37.718642", + "step": 3626, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.757363", + "step": 3626, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.058006249368190765, + "timestamp": "2025-10-01 03:25:37.760473", + "step": 3627, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.792431", + "step": 3627, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007660353439860046, + "timestamp": "2025-10-01 03:25:37.817727", + "step": 3628, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.852371", + "step": 3628, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012416710145771503, + "timestamp": "2025-10-01 03:25:37.855024", + "step": 3629, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:37.888531", + "step": 3629, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012505775317549706, + "timestamp": "2025-10-01 03:25:37.891016", + "step": 3630, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:37.922285", + "step": 3630, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01787690818309784, + "timestamp": "2025-10-01 03:25:37.925082", + "step": 3631, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:37.956945", + "step": 3631, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04762158542871475, + "timestamp": "2025-10-01 03:25:37.982045", + "step": 3632, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.020469", + "step": 3632, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011460947804152966, + "timestamp": "2025-10-01 03:25:38.022560", + "step": 3633, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:38.054697", + "step": 3633, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006007209303788841, + "timestamp": "2025-10-01 03:25:38.059189", + "step": 3634, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.093573", + "step": 3634, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000826676725409925, + "timestamp": "2025-10-01 03:25:38.099194", + "step": 3635, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.133331", + "step": 3635, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005052602384239435, + "timestamp": "2025-10-01 03:25:38.159896", + "step": 3636, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.192791", + "step": 3636, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010320753790438175, + "timestamp": "2025-10-01 03:25:38.196271", + "step": 3637, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.229365", + "step": 3637, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008463467471301556, + "timestamp": "2025-10-01 03:25:38.231534", + "step": 3638, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.263677", + "step": 3638, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003800557227805257, + "timestamp": "2025-10-01 03:25:38.265905", + "step": 3639, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.299146", + "step": 3639, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008315506274811924, + "timestamp": "2025-10-01 03:25:38.323054", + "step": 3640, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.357763", + "step": 3640, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01909547671675682, + "timestamp": "2025-10-01 03:25:38.360214", + "step": 3641, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.393263", + "step": 3641, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012589922174811363, + "timestamp": "2025-10-01 03:25:38.395531", + "step": 3642, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:38.429275", + "step": 3642, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017418264178559184, + "timestamp": "2025-10-01 03:25:38.431596", + "step": 3643, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.463216", + "step": 3643, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032596171367913485, + "timestamp": "2025-10-01 03:25:38.486797", + "step": 3644, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.517852", + "step": 3644, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002009347081184387, + "timestamp": "2025-10-01 03:25:38.519954", + "step": 3645, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.551776", + "step": 3645, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01852526143193245, + "timestamp": "2025-10-01 03:25:38.554526", + "step": 3646, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.590580", + "step": 3646, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013805091381072998, + "timestamp": "2025-10-01 03:25:38.607815", + "step": 3647, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:38.654412", + "step": 3647, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023946010041981936, + "timestamp": "2025-10-01 03:25:38.681244", + "step": 3648, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:25:40.998712", + "step": 3648, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2220295.310325338, + "timestamp": "2025-10-01 03:25:41.012456", + "step": 3648, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.053659", + "step": 3648, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03135523572564125, + "timestamp": "2025-10-01 03:25:41.063636", + "step": 3649, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.097236", + "step": 3649, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032778920140117407, + "timestamp": "2025-10-01 03:25:41.108631", + "step": 3650, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.151611", + "step": 3650, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004716848488897085, + "timestamp": "2025-10-01 03:25:41.160822", + "step": 3651, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.202196", + "step": 3651, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003457795362919569, + "timestamp": "2025-10-01 03:25:41.236446", + "step": 3652, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.283940", + "step": 3652, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012129256501793861, + "timestamp": "2025-10-01 03:25:41.298624", + "step": 3653, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.332448", + "step": 3653, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03477315977215767, + "timestamp": "2025-10-01 03:25:41.345622", + "step": 3654, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:41.385417", + "step": 3654, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005663577932864428, + "timestamp": "2025-10-01 03:25:41.396771", + "step": 3655, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.434666", + "step": 3655, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024142978712916374, + "timestamp": "2025-10-01 03:25:41.470062", + "step": 3656, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.512935", + "step": 3656, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004866109229624271, + "timestamp": "2025-10-01 03:25:41.524547", + "step": 3657, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.572978", + "step": 3657, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0042257653549313545, + "timestamp": "2025-10-01 03:25:41.585481", + "step": 3658, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.619610", + "step": 3658, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003436512779444456, + "timestamp": "2025-10-01 03:25:41.629512", + "step": 3659, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:41.670770", + "step": 3659, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010270093334838748, + "timestamp": "2025-10-01 03:25:41.704269", + "step": 3660, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.736799", + "step": 3660, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006943754386156797, + "timestamp": "2025-10-01 03:25:41.745510", + "step": 3661, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.784056", + "step": 3661, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005565951112657785, + "timestamp": "2025-10-01 03:25:41.787819", + "step": 3662, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.819719", + "step": 3662, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02569407783448696, + "timestamp": "2025-10-01 03:25:41.823973", + "step": 3663, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.855332", + "step": 3663, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003985781688243151, + "timestamp": "2025-10-01 03:25:41.881559", + "step": 3664, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.916377", + "step": 3664, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014589845202863216, + "timestamp": "2025-10-01 03:25:41.920900", + "step": 3665, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.958277", + "step": 3665, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001824434963054955, + "timestamp": "2025-10-01 03:25:41.962263", + "step": 3666, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:41.994951", + "step": 3666, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02272934280335903, + "timestamp": "2025-10-01 03:25:42.008296", + "step": 3667, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:42.050614", + "step": 3667, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021943295374512672, + "timestamp": "2025-10-01 03:25:42.090100", + "step": 3668, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:42.139959", + "step": 3668, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.041517965495586395, + "timestamp": "2025-10-01 03:25:42.156917", + "step": 3669, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:42.206550", + "step": 3669, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005363538512028754, + "timestamp": "2025-10-01 03:25:42.224893", + "step": 3670, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:42.266883", + "step": 3670, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010053033474832773, + "timestamp": "2025-10-01 03:25:42.281206", + "step": 3671, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:42.324047", + "step": 3671, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006711864261887968, + "timestamp": "2025-10-01 03:25:42.362163", + "step": 3672, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:42.411155", + "step": 3672, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04918567091226578, + "timestamp": "2025-10-01 03:25:42.426890", + "step": 3673, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:42.458026", + "step": 3673, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013399721123278141, + "timestamp": "2025-10-01 03:25:42.461465", + "step": 3674, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:42.495081", + "step": 3674, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009293023613281548, + "timestamp": "2025-10-01 03:25:42.498507", + "step": 3675, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:42.530597", + "step": 3675, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018534990027546883, + "timestamp": "2025-10-01 03:25:42.556378", + "step": 3676, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:42.588311", + "step": 3676, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007140895817428827, + "timestamp": "2025-10-01 03:25:42.592425", + "step": 3677, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:42.624513", + "step": 3677, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04438280314207077, + "timestamp": "2025-10-01 03:25:42.630139", + "step": 3678, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:42.663291", + "step": 3678, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027776677161455154, + "timestamp": "2025-10-01 03:25:42.667809", + "step": 3679, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:42.700869", + "step": 3679, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004521213471889496, + "timestamp": "2025-10-01 03:25:42.727151", + "step": 3680, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:42.762629", + "step": 3680, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0119559271261096, + "timestamp": "2025-10-01 03:25:42.774018", + "step": 3681, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:42.819854", + "step": 3681, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03066115267574787, + "timestamp": "2025-10-01 03:25:42.833784", + "step": 3682, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:25:42.878417", + "step": 3682, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011823282577097416, + "timestamp": "2025-10-01 03:25:42.890970", + "step": 3683, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:42.933788", + "step": 3683, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005724582355469465, + "timestamp": "2025-10-01 03:25:42.969522", + "step": 3684, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:43.016429", + "step": 3684, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010556580498814583, + "timestamp": "2025-10-01 03:25:43.027413", + "step": 3685, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.071125", + "step": 3685, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01169345062226057, + "timestamp": "2025-10-01 03:25:43.082777", + "step": 3686, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.123151", + "step": 3686, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01106344349682331, + "timestamp": "2025-10-01 03:25:43.126515", + "step": 3687, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.157003", + "step": 3687, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.033307116478681564, + "timestamp": "2025-10-01 03:25:43.189986", + "step": 3688, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.227866", + "step": 3688, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005124320276081562, + "timestamp": "2025-10-01 03:25:43.233633", + "step": 3689, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.274925", + "step": 3689, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00952932145446539, + "timestamp": "2025-10-01 03:25:43.283129", + "step": 3690, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.323192", + "step": 3690, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002189344260841608, + "timestamp": "2025-10-01 03:25:43.329725", + "step": 3691, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.364088", + "step": 3691, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02037063241004944, + "timestamp": "2025-10-01 03:25:43.395572", + "step": 3692, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.427991", + "step": 3692, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0458696149289608, + "timestamp": "2025-10-01 03:25:43.440376", + "step": 3693, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.481960", + "step": 3693, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004629707429558039, + "timestamp": "2025-10-01 03:25:43.492724", + "step": 3694, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.533770", + "step": 3694, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034251597244292498, + "timestamp": "2025-10-01 03:25:43.545540", + "step": 3695, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.586471", + "step": 3695, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018507344648241997, + "timestamp": "2025-10-01 03:25:43.619552", + "step": 3696, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.659641", + "step": 3696, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008135197684168816, + "timestamp": "2025-10-01 03:25:43.671631", + "step": 3697, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:43.712189", + "step": 3697, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007164764101617038, + "timestamp": "2025-10-01 03:25:43.722662", + "step": 3698, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.759299", + "step": 3698, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004314429592341185, + "timestamp": "2025-10-01 03:25:43.762537", + "step": 3699, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.802506", + "step": 3699, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.031223373487591743, + "timestamp": "2025-10-01 03:25:43.836513", + "step": 3700, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.871982", + "step": 3700, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0051519861444830894, + "timestamp": "2025-10-01 03:25:43.880156", + "step": 3701, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:43.920454", + "step": 3701, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05817556753754616, + "timestamp": "2025-10-01 03:25:43.924710", + "step": 3702, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:43.956251", + "step": 3702, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011040669865906239, + "timestamp": "2025-10-01 03:25:43.966627", + "step": 3703, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.017515", + "step": 3703, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003370035672560334, + "timestamp": "2025-10-01 03:25:44.043477", + "step": 3704, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.081708", + "step": 3704, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002861476270481944, + "timestamp": "2025-10-01 03:25:44.095847", + "step": 3705, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.137123", + "step": 3705, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012040823698043823, + "timestamp": "2025-10-01 03:25:44.141638", + "step": 3706, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.181557", + "step": 3706, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015863711014389992, + "timestamp": "2025-10-01 03:25:44.193782", + "step": 3707, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.233135", + "step": 3707, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024866857565939426, + "timestamp": "2025-10-01 03:25:44.268376", + "step": 3708, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.309345", + "step": 3708, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006397690623998642, + "timestamp": "2025-10-01 03:25:44.311543", + "step": 3709, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.342930", + "step": 3709, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009702669340185821, + "timestamp": "2025-10-01 03:25:44.351285", + "step": 3710, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:44.394871", + "step": 3710, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002475014654919505, + "timestamp": "2025-10-01 03:25:44.398316", + "step": 3711, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.430348", + "step": 3711, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009405911550857127, + "timestamp": "2025-10-01 03:25:44.457388", + "step": 3712, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.491336", + "step": 3712, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024636450689285994, + "timestamp": "2025-10-01 03:25:44.499445", + "step": 3713, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.535555", + "step": 3713, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016282742843031883, + "timestamp": "2025-10-01 03:25:44.539198", + "step": 3714, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.570083", + "step": 3714, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006447056657634676, + "timestamp": "2025-10-01 03:25:44.581008", + "step": 3715, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.613610", + "step": 3715, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00039655264117754996, + "timestamp": "2025-10-01 03:25:44.641396", + "step": 3716, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:25:44.674985", + "step": 3716, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006354427896440029, + "timestamp": "2025-10-01 03:25:44.683391", + "step": 3717, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.720835", + "step": 3717, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002363142091780901, + "timestamp": "2025-10-01 03:25:44.723854", + "step": 3718, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:44.757092", + "step": 3718, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03613544628024101, + "timestamp": "2025-10-01 03:25:44.765960", + "step": 3719, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:44.803823", + "step": 3719, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003200301667675376, + "timestamp": "2025-10-01 03:25:44.836256", + "step": 3720, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.873111", + "step": 3720, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00040579927735961974, + "timestamp": "2025-10-01 03:25:44.881951", + "step": 3721, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.918204", + "step": 3721, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03788243979215622, + "timestamp": "2025-10-01 03:25:44.926889", + "step": 3722, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:44.966680", + "step": 3722, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005566075909882784, + "timestamp": "2025-10-01 03:25:44.978393", + "step": 3723, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:45.013400", + "step": 3723, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008345883688889444, + "timestamp": "2025-10-01 03:25:45.041729", + "step": 3724, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:45.077456", + "step": 3724, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0264884065836668, + "timestamp": "2025-10-01 03:25:45.087767", + "step": 3725, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:45.128906", + "step": 3725, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02320765145123005, + "timestamp": "2025-10-01 03:25:45.141214", + "step": 3726, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:45.181609", + "step": 3726, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03159389644861221, + "timestamp": "2025-10-01 03:25:45.185187", + "step": 3727, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:45.217895", + "step": 3727, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04115120321512222, + "timestamp": "2025-10-01 03:25:45.252831", + "step": 3728, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:45.301958", + "step": 3728, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008094403310678899, + "timestamp": "2025-10-01 03:25:45.311445", + "step": 3729, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:45.356399", + "step": 3729, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05157012492418289, + "timestamp": "2025-10-01 03:25:45.365800", + "step": 3730, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:45.406079", + "step": 3730, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006714377552270889, + "timestamp": "2025-10-01 03:25:45.418476", + "step": 3731, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:45.455987", + "step": 3731, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010803203331306577, + "timestamp": "2025-10-01 03:25:45.486133", + "step": 3732, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:45.523123", + "step": 3732, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002906732726842165, + "timestamp": "2025-10-01 03:25:45.527780", + "step": 3733, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:45.564857", + "step": 3733, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012306955177336931, + "timestamp": "2025-10-01 03:25:45.567164", + "step": 3734, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:45.599524", + "step": 3734, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009444309980608523, + "timestamp": "2025-10-01 03:25:45.605770", + "step": 3735, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:45.642827", + "step": 3735, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024306210689246655, + "timestamp": "2025-10-01 03:25:45.670393", + "step": 3736, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:45.704899", + "step": 3736, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007744734641164541, + "timestamp": "2025-10-01 03:25:45.710472", + "step": 3737, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:45.742138", + "step": 3737, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001411815406754613, + "timestamp": "2025-10-01 03:25:45.747741", + "step": 3738, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:45.784713", + "step": 3738, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010095217730849981, + "timestamp": "2025-10-01 03:25:45.789844", + "step": 3739, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:45.825918", + "step": 3739, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009947827784344554, + "timestamp": "2025-10-01 03:25:45.850715", + "step": 3740, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:45.886182", + "step": 3740, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017636605771258473, + "timestamp": "2025-10-01 03:25:45.892579", + "step": 3741, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:45.927523", + "step": 3741, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006213573506101966, + "timestamp": "2025-10-01 03:25:45.930919", + "step": 3742, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:45.966252", + "step": 3742, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008985876338556409, + "timestamp": "2025-10-01 03:25:45.976902", + "step": 3743, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:46.016374", + "step": 3743, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022497272584587336, + "timestamp": "2025-10-01 03:25:46.045268", + "step": 3744, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:46.081064", + "step": 3744, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029928233474493027, + "timestamp": "2025-10-01 03:25:46.086353", + "step": 3745, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:46.127069", + "step": 3745, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.07028307020664215, + "timestamp": "2025-10-01 03:25:46.135753", + "step": 3746, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:46.172182", + "step": 3746, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005358458496630192, + "timestamp": "2025-10-01 03:25:46.183050", + "step": 3747, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:46.225045", + "step": 3747, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003016003640368581, + "timestamp": "2025-10-01 03:25:46.255715", + "step": 3748, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:46.295580", + "step": 3748, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005588397034443915, + "timestamp": "2025-10-01 03:25:46.304997", + "step": 3749, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:46.348664", + "step": 3749, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00047901985817588866, + "timestamp": "2025-10-01 03:25:46.351988", + "step": 3750, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:46.387841", + "step": 3750, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00122690643183887, + "timestamp": "2025-10-01 03:25:46.390273", + "step": 3751, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:46.424791", + "step": 3751, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006884299218654633, + "timestamp": "2025-10-01 03:25:46.451452", + "step": 3752, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:46.495552", + "step": 3752, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003296991344541311, + "timestamp": "2025-10-01 03:25:46.503655", + "step": 3753, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:46.538087", + "step": 3753, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007244452135637403, + "timestamp": "2025-10-01 03:25:46.546049", + "step": 3754, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:46.582814", + "step": 3754, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004470879503060132, + "timestamp": "2025-10-01 03:25:46.589102", + "step": 3755, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:46.626965", + "step": 3755, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.037468742579221725, + "timestamp": "2025-10-01 03:25:46.653494", + "step": 3756, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:46.686136", + "step": 3756, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026595164090394974, + "timestamp": "2025-10-01 03:25:46.693591", + "step": 3757, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:46.736503", + "step": 3757, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014938062522560358, + "timestamp": "2025-10-01 03:25:46.741491", + "step": 3758, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:46.778123", + "step": 3758, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0340430848300457, + "timestamp": "2025-10-01 03:25:46.783259", + "step": 3759, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:46.817611", + "step": 3759, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000650464091449976, + "timestamp": "2025-10-01 03:25:46.845213", + "step": 3760, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:46.880063", + "step": 3760, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022249771282076836, + "timestamp": "2025-10-01 03:25:46.886630", + "step": 3761, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:46.921051", + "step": 3761, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011189329670742154, + "timestamp": "2025-10-01 03:25:46.938322", + "step": 3762, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:46.983370", + "step": 3762, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006580372340977192, + "timestamp": "2025-10-01 03:25:46.989316", + "step": 3763, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:47.024091", + "step": 3763, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005848858854733407, + "timestamp": "2025-10-01 03:25:47.052539", + "step": 3764, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:47.092434", + "step": 3764, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022373899817466736, + "timestamp": "2025-10-01 03:25:47.110485", + "step": 3765, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:47.162162", + "step": 3765, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034399149008095264, + "timestamp": "2025-10-01 03:25:47.167539", + "step": 3766, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:47.218786", + "step": 3766, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029058249667286873, + "timestamp": "2025-10-01 03:25:47.239008", + "step": 3767, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:47.286609", + "step": 3767, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00784750934690237, + "timestamp": "2025-10-01 03:25:47.329393", + "step": 3768, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:47.365862", + "step": 3768, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004159036558121443, + "timestamp": "2025-10-01 03:25:47.386374", + "step": 3769, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:47.433816", + "step": 3769, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01476783026009798, + "timestamp": "2025-10-01 03:25:47.439351", + "step": 3770, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:47.471681", + "step": 3770, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01913689635694027, + "timestamp": "2025-10-01 03:25:47.488864", + "step": 3771, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:47.534387", + "step": 3771, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03660781309008598, + "timestamp": "2025-10-01 03:25:47.571452", + "step": 3772, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:47.614354", + "step": 3772, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003064374905079603, + "timestamp": "2025-10-01 03:25:47.619568", + "step": 3773, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:47.662334", + "step": 3773, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.041894298046827316, + "timestamp": "2025-10-01 03:25:47.670951", + "step": 3774, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:47.718722", + "step": 3774, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.040812116116285324, + "timestamp": "2025-10-01 03:25:47.723461", + "step": 3775, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:47.755451", + "step": 3775, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03692221641540527, + "timestamp": "2025-10-01 03:25:47.779635", + "step": 3776, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:47.823234", + "step": 3776, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04247002676129341, + "timestamp": "2025-10-01 03:25:47.825749", + "step": 3777, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:47.856990", + "step": 3777, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006851964630186558, + "timestamp": "2025-10-01 03:25:47.859517", + "step": 3778, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:47.892451", + "step": 3778, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014917566440999508, + "timestamp": "2025-10-01 03:25:47.894726", + "step": 3779, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:47.926791", + "step": 3779, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.038920629769563675, + "timestamp": "2025-10-01 03:25:47.951506", + "step": 3780, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:47.987856", + "step": 3780, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005150733981281519, + "timestamp": "2025-10-01 03:25:47.991075", + "step": 3781, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:48.023920", + "step": 3781, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05691367760300636, + "timestamp": "2025-10-01 03:25:48.027815", + "step": 3782, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:48.061504", + "step": 3782, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05343586578965187, + "timestamp": "2025-10-01 03:25:48.064280", + "step": 3783, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:48.104778", + "step": 3783, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010497691109776497, + "timestamp": "2025-10-01 03:25:48.129623", + "step": 3784, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:48.162312", + "step": 3784, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027480630204081535, + "timestamp": "2025-10-01 03:25:48.173627", + "step": 3785, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:48.213877", + "step": 3785, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0058451504446566105, + "timestamp": "2025-10-01 03:25:48.226391", + "step": 3786, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:48.267732", + "step": 3786, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028534960001707077, + "timestamp": "2025-10-01 03:25:48.280425", + "step": 3787, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:48.320371", + "step": 3787, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00403703935444355, + "timestamp": "2025-10-01 03:25:48.354552", + "step": 3788, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:48.394258", + "step": 3788, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0064873890951275826, + "timestamp": "2025-10-01 03:25:48.404448", + "step": 3789, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:48.444579", + "step": 3789, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01982584223151207, + "timestamp": "2025-10-01 03:25:48.454812", + "step": 3790, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:48.495114", + "step": 3790, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008917675353586674, + "timestamp": "2025-10-01 03:25:48.499115", + "step": 3791, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:48.546553", + "step": 3791, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02771948091685772, + "timestamp": "2025-10-01 03:25:48.571460", + "step": 3792, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:48.608298", + "step": 3792, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02449604496359825, + "timestamp": "2025-10-01 03:25:48.616753", + "step": 3793, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:48.652192", + "step": 3793, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0047488100826740265, + "timestamp": "2025-10-01 03:25:48.658794", + "step": 3794, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:48.697439", + "step": 3794, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005752161145210266, + "timestamp": "2025-10-01 03:25:48.704020", + "step": 3795, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:48.739082", + "step": 3795, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020761548075824976, + "timestamp": "2025-10-01 03:25:48.767872", + "step": 3796, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:48.807245", + "step": 3796, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02969350293278694, + "timestamp": "2025-10-01 03:25:48.816412", + "step": 3797, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:25:48.853998", + "step": 3797, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005797964986413717, + "timestamp": "2025-10-01 03:25:48.857579", + "step": 3798, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:48.901794", + "step": 3798, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00972217321395874, + "timestamp": "2025-10-01 03:25:48.905035", + "step": 3799, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:48.939248", + "step": 3799, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004076676443219185, + "timestamp": "2025-10-01 03:25:48.968817", + "step": 3800, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:25:51.262147", + "step": 3800, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2193423.7611755864, + "timestamp": "2025-10-01 03:25:51.264552", + "step": 3800, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.293736", + "step": 3800, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02527899481356144, + "timestamp": "2025-10-01 03:25:51.295871", + "step": 3801, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.326125", + "step": 3801, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008061420172452927, + "timestamp": "2025-10-01 03:25:51.328846", + "step": 3802, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:51.359187", + "step": 3802, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012881922535598278, + "timestamp": "2025-10-01 03:25:51.361580", + "step": 3803, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.394472", + "step": 3803, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026893919333815575, + "timestamp": "2025-10-01 03:25:51.418788", + "step": 3804, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.449219", + "step": 3804, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04573490098118782, + "timestamp": "2025-10-01 03:25:51.451615", + "step": 3805, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.482742", + "step": 3805, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008198058232665062, + "timestamp": "2025-10-01 03:25:51.484752", + "step": 3806, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:51.514945", + "step": 3806, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002470665145665407, + "timestamp": "2025-10-01 03:25:51.517047", + "step": 3807, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.547257", + "step": 3807, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023864582180976868, + "timestamp": "2025-10-01 03:25:51.571073", + "step": 3808, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.602089", + "step": 3808, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00903993844985962, + "timestamp": "2025-10-01 03:25:51.604522", + "step": 3809, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.636609", + "step": 3809, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008428963832557201, + "timestamp": "2025-10-01 03:25:51.638904", + "step": 3810, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.669617", + "step": 3810, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008772683329880238, + "timestamp": "2025-10-01 03:25:51.671908", + "step": 3811, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.702572", + "step": 3811, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007024561520665884, + "timestamp": "2025-10-01 03:25:51.726422", + "step": 3812, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.757397", + "step": 3812, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008745083585381508, + "timestamp": "2025-10-01 03:25:51.759642", + "step": 3813, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.802677", + "step": 3813, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.036471106112003326, + "timestamp": "2025-10-01 03:25:51.804959", + "step": 3814, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.838038", + "step": 3814, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004394039046019316, + "timestamp": "2025-10-01 03:25:51.840254", + "step": 3815, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:51.871245", + "step": 3815, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.09126897901296616, + "timestamp": "2025-10-01 03:25:51.895153", + "step": 3816, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.925820", + "step": 3816, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06227981671690941, + "timestamp": "2025-10-01 03:25:51.928204", + "step": 3817, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.960878", + "step": 3817, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02347668632864952, + "timestamp": "2025-10-01 03:25:51.963273", + "step": 3818, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:51.995918", + "step": 3818, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03446899726986885, + "timestamp": "2025-10-01 03:25:51.998178", + "step": 3819, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.030504", + "step": 3819, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002204354852437973, + "timestamp": "2025-10-01 03:25:52.054157", + "step": 3820, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.085100", + "step": 3820, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005948849022388458, + "timestamp": "2025-10-01 03:25:52.087308", + "step": 3821, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:52.121070", + "step": 3821, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004617323633283377, + "timestamp": "2025-10-01 03:25:52.125101", + "step": 3822, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.157609", + "step": 3822, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034059237223118544, + "timestamp": "2025-10-01 03:25:52.159868", + "step": 3823, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.190961", + "step": 3823, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008942323736846447, + "timestamp": "2025-10-01 03:25:52.215499", + "step": 3824, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.246920", + "step": 3824, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023353216238319874, + "timestamp": "2025-10-01 03:25:52.249093", + "step": 3825, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.279244", + "step": 3825, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032486910931766033, + "timestamp": "2025-10-01 03:25:52.282241", + "step": 3826, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.314377", + "step": 3826, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009845641441643238, + "timestamp": "2025-10-01 03:25:52.317373", + "step": 3827, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.349542", + "step": 3827, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013387949205935001, + "timestamp": "2025-10-01 03:25:52.374086", + "step": 3828, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.405041", + "step": 3828, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04304369166493416, + "timestamp": "2025-10-01 03:25:52.407638", + "step": 3829, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.439159", + "step": 3829, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013226264854893088, + "timestamp": "2025-10-01 03:25:52.442063", + "step": 3830, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.473798", + "step": 3830, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020136127714067698, + "timestamp": "2025-10-01 03:25:52.476689", + "step": 3831, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:52.508365", + "step": 3831, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014130047522485256, + "timestamp": "2025-10-01 03:25:52.533106", + "step": 3832, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.565586", + "step": 3832, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038181396666914225, + "timestamp": "2025-10-01 03:25:52.568649", + "step": 3833, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.601586", + "step": 3833, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005287017207592726, + "timestamp": "2025-10-01 03:25:52.604774", + "step": 3834, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.637687", + "step": 3834, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008395767770707607, + "timestamp": "2025-10-01 03:25:52.640806", + "step": 3835, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.672203", + "step": 3835, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008029984892345965, + "timestamp": "2025-10-01 03:25:52.697260", + "step": 3836, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.728099", + "step": 3836, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003277746494859457, + "timestamp": "2025-10-01 03:25:52.731897", + "step": 3837, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:52.764554", + "step": 3837, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011351080611348152, + "timestamp": "2025-10-01 03:25:52.767315", + "step": 3838, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.798727", + "step": 3838, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003831976791843772, + "timestamp": "2025-10-01 03:25:52.801685", + "step": 3839, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.833017", + "step": 3839, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007357203867286444, + "timestamp": "2025-10-01 03:25:52.857260", + "step": 3840, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.888485", + "step": 3840, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013001062907278538, + "timestamp": "2025-10-01 03:25:52.891403", + "step": 3841, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:52.922999", + "step": 3841, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019971453584730625, + "timestamp": "2025-10-01 03:25:52.925770", + "step": 3842, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.957896", + "step": 3842, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005691169295459986, + "timestamp": "2025-10-01 03:25:52.960579", + "step": 3843, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:52.992540", + "step": 3843, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012828148901462555, + "timestamp": "2025-10-01 03:25:53.016935", + "step": 3844, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:53.049219", + "step": 3844, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021598737686872482, + "timestamp": "2025-10-01 03:25:53.051854", + "step": 3845, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.084355", + "step": 3845, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011389302089810371, + "timestamp": "2025-10-01 03:25:53.088339", + "step": 3846, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.121241", + "step": 3846, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02478250302374363, + "timestamp": "2025-10-01 03:25:53.124211", + "step": 3847, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.158725", + "step": 3847, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013563251122832298, + "timestamp": "2025-10-01 03:25:53.183118", + "step": 3848, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:53.216115", + "step": 3848, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005215187091380358, + "timestamp": "2025-10-01 03:25:53.220162", + "step": 3849, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.252812", + "step": 3849, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015450985170900822, + "timestamp": "2025-10-01 03:25:53.256639", + "step": 3850, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.289450", + "step": 3850, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011858361773192883, + "timestamp": "2025-10-01 03:25:53.292444", + "step": 3851, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.327027", + "step": 3851, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004040651023387909, + "timestamp": "2025-10-01 03:25:53.351823", + "step": 3852, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.383937", + "step": 3852, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013513057492673397, + "timestamp": "2025-10-01 03:25:53.387091", + "step": 3853, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.418853", + "step": 3853, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001955266809090972, + "timestamp": "2025-10-01 03:25:53.423157", + "step": 3854, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.457267", + "step": 3854, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001485570683144033, + "timestamp": "2025-10-01 03:25:53.460322", + "step": 3855, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.492666", + "step": 3855, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003042319556698203, + "timestamp": "2025-10-01 03:25:53.517469", + "step": 3856, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.549319", + "step": 3856, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00543354544788599, + "timestamp": "2025-10-01 03:25:53.551912", + "step": 3857, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.583275", + "step": 3857, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002211845712736249, + "timestamp": "2025-10-01 03:25:53.586632", + "step": 3858, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:53.618475", + "step": 3858, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05411337688565254, + "timestamp": "2025-10-01 03:25:53.621436", + "step": 3859, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.653984", + "step": 3859, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007920268690213561, + "timestamp": "2025-10-01 03:25:53.678334", + "step": 3860, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.710778", + "step": 3860, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013811877928674221, + "timestamp": "2025-10-01 03:25:53.712980", + "step": 3861, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.744169", + "step": 3861, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03952335938811302, + "timestamp": "2025-10-01 03:25:53.747190", + "step": 3862, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.778888", + "step": 3862, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008440734818577766, + "timestamp": "2025-10-01 03:25:53.781739", + "step": 3863, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.812996", + "step": 3863, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014870602171868086, + "timestamp": "2025-10-01 03:25:53.837447", + "step": 3864, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.869814", + "step": 3864, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00468066381290555, + "timestamp": "2025-10-01 03:25:53.872072", + "step": 3865, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:53.902404", + "step": 3865, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019319983199238777, + "timestamp": "2025-10-01 03:25:53.904981", + "step": 3866, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.935836", + "step": 3866, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009435190004296601, + "timestamp": "2025-10-01 03:25:53.937978", + "step": 3867, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:53.970206", + "step": 3867, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007233875803649426, + "timestamp": "2025-10-01 03:25:53.993928", + "step": 3868, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.025482", + "step": 3868, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012202771380543709, + "timestamp": "2025-10-01 03:25:54.027873", + "step": 3869, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.060375", + "step": 3869, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035857746843248606, + "timestamp": "2025-10-01 03:25:54.065117", + "step": 3870, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.098973", + "step": 3870, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021556352730840445, + "timestamp": "2025-10-01 03:25:54.103518", + "step": 3871, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.136942", + "step": 3871, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023254988715052605, + "timestamp": "2025-10-01 03:25:54.160994", + "step": 3872, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.192506", + "step": 3872, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008402751991525292, + "timestamp": "2025-10-01 03:25:54.196283", + "step": 3873, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.228458", + "step": 3873, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005609482177533209, + "timestamp": "2025-10-01 03:25:54.231280", + "step": 3874, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.263098", + "step": 3874, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006870602373965085, + "timestamp": "2025-10-01 03:25:54.265364", + "step": 3875, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.296638", + "step": 3875, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012054344406351447, + "timestamp": "2025-10-01 03:25:54.320818", + "step": 3876, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:54.352178", + "step": 3876, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001986431423574686, + "timestamp": "2025-10-01 03:25:54.354714", + "step": 3877, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.385720", + "step": 3877, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04699980467557907, + "timestamp": "2025-10-01 03:25:54.388491", + "step": 3878, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.419891", + "step": 3878, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04703683778643608, + "timestamp": "2025-10-01 03:25:54.421889", + "step": 3879, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.452985", + "step": 3879, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003931990941055119, + "timestamp": "2025-10-01 03:25:54.479249", + "step": 3880, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.510282", + "step": 3880, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03931304067373276, + "timestamp": "2025-10-01 03:25:54.512567", + "step": 3881, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:54.543472", + "step": 3881, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009573386632837355, + "timestamp": "2025-10-01 03:25:54.545712", + "step": 3882, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:54.577077", + "step": 3882, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003937209490686655, + "timestamp": "2025-10-01 03:25:54.579822", + "step": 3883, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.611057", + "step": 3883, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005670327809639275, + "timestamp": "2025-10-01 03:25:54.638294", + "step": 3884, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.671451", + "step": 3884, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05478212982416153, + "timestamp": "2025-10-01 03:25:54.673752", + "step": 3885, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.704920", + "step": 3885, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007473878096789122, + "timestamp": "2025-10-01 03:25:54.707562", + "step": 3886, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.738133", + "step": 3886, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007263060659170151, + "timestamp": "2025-10-01 03:25:54.740940", + "step": 3887, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.778312", + "step": 3887, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010083552915602922, + "timestamp": "2025-10-01 03:25:54.802351", + "step": 3888, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.841493", + "step": 3888, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009214082383550704, + "timestamp": "2025-10-01 03:25:54.843977", + "step": 3889, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.878256", + "step": 3889, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00622709421440959, + "timestamp": "2025-10-01 03:25:54.880510", + "step": 3890, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:54.913929", + "step": 3890, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007368508959189057, + "timestamp": "2025-10-01 03:25:54.916352", + "step": 3891, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:54.947529", + "step": 3891, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023922850377857685, + "timestamp": "2025-10-01 03:25:54.971506", + "step": 3892, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.009094", + "step": 3892, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023762879893183708, + "timestamp": "2025-10-01 03:25:55.011307", + "step": 3893, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.042943", + "step": 3893, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008941550739109516, + "timestamp": "2025-10-01 03:25:55.045017", + "step": 3894, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.077341", + "step": 3894, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016209924360737205, + "timestamp": "2025-10-01 03:25:55.079684", + "step": 3895, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.110478", + "step": 3895, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004493734741117805, + "timestamp": "2025-10-01 03:25:55.134412", + "step": 3896, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:55.165065", + "step": 3896, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017417655908502638, + "timestamp": "2025-10-01 03:25:55.167274", + "step": 3897, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.198711", + "step": 3897, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04938872531056404, + "timestamp": "2025-10-01 03:25:55.206225", + "step": 3898, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.243782", + "step": 3898, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031015430577099323, + "timestamp": "2025-10-01 03:25:55.247298", + "step": 3899, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.279583", + "step": 3899, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013709425926208496, + "timestamp": "2025-10-01 03:25:55.303149", + "step": 3900, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.334530", + "step": 3900, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00025282875867560506, + "timestamp": "2025-10-01 03:25:55.336820", + "step": 3901, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:55.368680", + "step": 3901, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05898391455411911, + "timestamp": "2025-10-01 03:25:55.370770", + "step": 3902, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.403565", + "step": 3902, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005065280012786388, + "timestamp": "2025-10-01 03:25:55.406068", + "step": 3903, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.438075", + "step": 3903, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018319033551961184, + "timestamp": "2025-10-01 03:25:55.462170", + "step": 3904, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.497027", + "step": 3904, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009416757384315133, + "timestamp": "2025-10-01 03:25:55.500390", + "step": 3905, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.535536", + "step": 3905, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011747373268008232, + "timestamp": "2025-10-01 03:25:55.537896", + "step": 3906, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.568854", + "step": 3906, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007602805271744728, + "timestamp": "2025-10-01 03:25:55.571479", + "step": 3907, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:55.603024", + "step": 3907, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.08483539521694183, + "timestamp": "2025-10-01 03:25:55.626660", + "step": 3908, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.669731", + "step": 3908, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03134254366159439, + "timestamp": "2025-10-01 03:25:55.671990", + "step": 3909, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.703899", + "step": 3909, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006822552997618914, + "timestamp": "2025-10-01 03:25:55.705995", + "step": 3910, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.738261", + "step": 3910, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008914990350604057, + "timestamp": "2025-10-01 03:25:55.740643", + "step": 3911, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:55.771773", + "step": 3911, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008859005174599588, + "timestamp": "2025-10-01 03:25:55.795589", + "step": 3912, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.827115", + "step": 3912, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005231828428804874, + "timestamp": "2025-10-01 03:25:55.829709", + "step": 3913, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:55.861843", + "step": 3913, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00625985860824585, + "timestamp": "2025-10-01 03:25:55.864108", + "step": 3914, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.895881", + "step": 3914, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005497562815435231, + "timestamp": "2025-10-01 03:25:55.898059", + "step": 3915, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.928895", + "step": 3915, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022743327543139458, + "timestamp": "2025-10-01 03:25:55.952552", + "step": 3916, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:55.984270", + "step": 3916, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006264414405450225, + "timestamp": "2025-10-01 03:25:55.986358", + "step": 3917, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.021119", + "step": 3917, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006860832683742046, + "timestamp": "2025-10-01 03:25:56.023299", + "step": 3918, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.054936", + "step": 3918, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005669092060998082, + "timestamp": "2025-10-01 03:25:56.057203", + "step": 3919, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:56.089199", + "step": 3919, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04185239598155022, + "timestamp": "2025-10-01 03:25:56.113526", + "step": 3920, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.144691", + "step": 3920, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03954973816871643, + "timestamp": "2025-10-01 03:25:56.147539", + "step": 3921, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.180245", + "step": 3921, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0868050828576088, + "timestamp": "2025-10-01 03:25:56.182780", + "step": 3922, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.214023", + "step": 3922, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014833689201623201, + "timestamp": "2025-10-01 03:25:56.221873", + "step": 3923, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:56.255195", + "step": 3923, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001120955916121602, + "timestamp": "2025-10-01 03:25:56.279275", + "step": 3924, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.311708", + "step": 3924, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021628376096487045, + "timestamp": "2025-10-01 03:25:56.313963", + "step": 3925, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.349076", + "step": 3925, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013002762570977211, + "timestamp": "2025-10-01 03:25:56.351085", + "step": 3926, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.381791", + "step": 3926, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04031941294670105, + "timestamp": "2025-10-01 03:25:56.383872", + "step": 3927, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:56.415070", + "step": 3927, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031476959120482206, + "timestamp": "2025-10-01 03:25:56.438858", + "step": 3928, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.470445", + "step": 3928, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012218542397022247, + "timestamp": "2025-10-01 03:25:56.472650", + "step": 3929, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.503568", + "step": 3929, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01983296498656273, + "timestamp": "2025-10-01 03:25:56.505778", + "step": 3930, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.536285", + "step": 3930, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016835188725963235, + "timestamp": "2025-10-01 03:25:56.538492", + "step": 3931, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.569399", + "step": 3931, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012387685710564256, + "timestamp": "2025-10-01 03:25:56.593373", + "step": 3932, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.624189", + "step": 3932, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01876281201839447, + "timestamp": "2025-10-01 03:25:56.626895", + "step": 3933, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:56.658848", + "step": 3933, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025158815551549196, + "timestamp": "2025-10-01 03:25:56.661355", + "step": 3934, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.693458", + "step": 3934, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0041622924618422985, + "timestamp": "2025-10-01 03:25:56.695642", + "step": 3935, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.728434", + "step": 3935, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001098063075914979, + "timestamp": "2025-10-01 03:25:56.752295", + "step": 3936, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.785181", + "step": 3936, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009100591414608061, + "timestamp": "2025-10-01 03:25:56.787344", + "step": 3937, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.820185", + "step": 3937, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01101985014975071, + "timestamp": "2025-10-01 03:25:56.822836", + "step": 3938, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.853512", + "step": 3938, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002356578828766942, + "timestamp": "2025-10-01 03:25:56.856137", + "step": 3939, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.887447", + "step": 3939, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01099521853029728, + "timestamp": "2025-10-01 03:25:56.911125", + "step": 3940, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.941911", + "step": 3940, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006152810994535685, + "timestamp": "2025-10-01 03:25:56.944430", + "step": 3941, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:56.975084", + "step": 3941, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001344628049992025, + "timestamp": "2025-10-01 03:25:56.977304", + "step": 3942, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:57.009044", + "step": 3942, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003429247299209237, + "timestamp": "2025-10-01 03:25:57.011518", + "step": 3943, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:57.042795", + "step": 3943, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019741656724363565, + "timestamp": "2025-10-01 03:25:57.066385", + "step": 3944, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:57.097993", + "step": 3944, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011194000020623207, + "timestamp": "2025-10-01 03:25:57.100156", + "step": 3945, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:57.130749", + "step": 3945, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.038976024836301804, + "timestamp": "2025-10-01 03:25:57.133288", + "step": 3946, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:25:57.166371", + "step": 3946, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018252229318022728, + "timestamp": "2025-10-01 03:25:57.168381", + "step": 3947, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:57.198910", + "step": 3947, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028129300102591515, + "timestamp": "2025-10-01 03:25:57.222825", + "step": 3948, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:57.255579", + "step": 3948, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.062144629657268524, + "timestamp": "2025-10-01 03:25:57.258628", + "step": 3949, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:57.289795", + "step": 3949, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030541354790329933, + "timestamp": "2025-10-01 03:25:57.292058", + "step": 3950, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:57.323297", + "step": 3950, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003587555605918169, + "timestamp": "2025-10-01 03:25:57.325411", + "step": 3951, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:57.357475", + "step": 3951, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006371343624778092, + "timestamp": "2025-10-01 03:25:57.381445", + "step": 3952, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:25:59.563203", + "step": 3952, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2454627.7853308045, + "timestamp": "2025-10-01 03:25:59.565467", + "step": 3952, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:59.595501", + "step": 3952, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018605778459459543, + "timestamp": "2025-10-01 03:25:59.597872", + "step": 3953, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:59.628500", + "step": 3953, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006384612061083317, + "timestamp": "2025-10-01 03:25:59.630630", + "step": 3954, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:59.661251", + "step": 3954, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006748204585164785, + "timestamp": "2025-10-01 03:25:59.664636", + "step": 3955, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:59.696248", + "step": 3955, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03574059158563614, + "timestamp": "2025-10-01 03:25:59.720153", + "step": 3956, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:59.756014", + "step": 3956, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012310275807976723, + "timestamp": "2025-10-01 03:25:59.758359", + "step": 3957, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:59.789199", + "step": 3957, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003594395937398076, + "timestamp": "2025-10-01 03:25:59.791335", + "step": 3958, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:25:59.823852", + "step": 3958, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002350627211853862, + "timestamp": "2025-10-01 03:25:59.825878", + "step": 3959, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:59.856868", + "step": 3959, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007026888430118561, + "timestamp": "2025-10-01 03:25:59.880549", + "step": 3960, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:59.914218", + "step": 3960, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01642121933400631, + "timestamp": "2025-10-01 03:25:59.916574", + "step": 3961, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:59.948940", + "step": 3961, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05921017751097679, + "timestamp": "2025-10-01 03:25:59.957849", + "step": 3962, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:25:59.990587", + "step": 3962, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.048742618411779404, + "timestamp": "2025-10-01 03:25:59.992764", + "step": 3963, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.024413", + "step": 3963, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03307139128446579, + "timestamp": "2025-10-01 03:26:00.048138", + "step": 3964, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:00.079270", + "step": 3964, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00785102229565382, + "timestamp": "2025-10-01 03:26:00.083645", + "step": 3965, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.116962", + "step": 3965, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002228035358712077, + "timestamp": "2025-10-01 03:26:00.119410", + "step": 3966, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.152227", + "step": 3966, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025069117546081543, + "timestamp": "2025-10-01 03:26:00.154594", + "step": 3967, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:00.190070", + "step": 3967, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020787613466382027, + "timestamp": "2025-10-01 03:26:00.213880", + "step": 3968, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.244696", + "step": 3968, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031320650596171618, + "timestamp": "2025-10-01 03:26:00.247646", + "step": 3969, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.278873", + "step": 3969, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022316692396998405, + "timestamp": "2025-10-01 03:26:00.281052", + "step": 3970, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.311434", + "step": 3970, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002479802817106247, + "timestamp": "2025-10-01 03:26:00.313729", + "step": 3971, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.344252", + "step": 3971, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037620540242642164, + "timestamp": "2025-10-01 03:26:00.368089", + "step": 3972, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.402420", + "step": 3972, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005907042417675257, + "timestamp": "2025-10-01 03:26:00.404639", + "step": 3973, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.435686", + "step": 3973, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00734433950856328, + "timestamp": "2025-10-01 03:26:00.437932", + "step": 3974, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.471351", + "step": 3974, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038933493196964264, + "timestamp": "2025-10-01 03:26:00.475244", + "step": 3975, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:26:00.506799", + "step": 3975, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009545693174004555, + "timestamp": "2025-10-01 03:26:00.531011", + "step": 3976, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:00.562131", + "step": 3976, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019129324704408646, + "timestamp": "2025-10-01 03:26:00.564352", + "step": 3977, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.594959", + "step": 3977, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015085582854226232, + "timestamp": "2025-10-01 03:26:00.597307", + "step": 3978, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.628295", + "step": 3978, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004181261174380779, + "timestamp": "2025-10-01 03:26:00.630619", + "step": 3979, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.661041", + "step": 3979, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01687845215201378, + "timestamp": "2025-10-01 03:26:00.684813", + "step": 3980, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.719268", + "step": 3980, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011647572973743081, + "timestamp": "2025-10-01 03:26:00.721811", + "step": 3981, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.753817", + "step": 3981, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01142753753811121, + "timestamp": "2025-10-01 03:26:00.758057", + "step": 3982, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.789438", + "step": 3982, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.037877414375543594, + "timestamp": "2025-10-01 03:26:00.791459", + "step": 3983, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.822122", + "step": 3983, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001583069795742631, + "timestamp": "2025-10-01 03:26:00.846602", + "step": 3984, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.878090", + "step": 3984, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01224056538194418, + "timestamp": "2025-10-01 03:26:00.880670", + "step": 3985, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:26:00.912104", + "step": 3985, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020480085164308548, + "timestamp": "2025-10-01 03:26:00.914818", + "step": 3986, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.945995", + "step": 3986, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020953675266355276, + "timestamp": "2025-10-01 03:26:00.948164", + "step": 3987, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:00.979056", + "step": 3987, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002961882622912526, + "timestamp": "2025-10-01 03:26:01.002979", + "step": 3988, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:01.034020", + "step": 3988, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00270765065215528, + "timestamp": "2025-10-01 03:26:01.036318", + "step": 3989, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:01.068250", + "step": 3989, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003880836768075824, + "timestamp": "2025-10-01 03:26:01.070309", + "step": 3990, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:01.101334", + "step": 3990, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005384677555412054, + "timestamp": "2025-10-01 03:26:01.103669", + "step": 3991, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:01.134263", + "step": 3991, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02931239828467369, + "timestamp": "2025-10-01 03:26:01.157993", + "step": 3992, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:01.192309", + "step": 3992, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021260951180011034, + "timestamp": "2025-10-01 03:26:01.194444", + "step": 3993, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:01.226342", + "step": 3993, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01947905123233795, + "timestamp": "2025-10-01 03:26:01.229426", + "step": 3994, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:01.261638", + "step": 3994, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02777544967830181, + "timestamp": "2025-10-01 03:26:01.264762", + "step": 3995, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:01.295597", + "step": 3995, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032576103694736958, + "timestamp": "2025-10-01 03:26:01.319191", + "step": 3996, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:01.351393", + "step": 3996, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002629687311127782, + "timestamp": "2025-10-01 03:26:01.353617", + "step": 3997, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:01.384036", + "step": 3997, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006561639718711376, + "timestamp": "2025-10-01 03:26:01.387198", + "step": 3998, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:01.419371", + "step": 3998, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013400159776210785, + "timestamp": "2025-10-01 03:26:01.421469", + "step": 3999, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-10-01 03:26:01.453072", + "step": 3999, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00547891017049551, + "timestamp": "2025-10-01 03:26:01.477825", + "step": 4000, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 4000", + "timestamp": "2025-10-01 03:26:06.272567", + "step": 4000, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:06.316424", + "step": 4000, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03606186434626579, + "timestamp": "2025-10-01 03:26:06.321125", + "step": 4001, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:06.354200", + "step": 4001, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03156668692827225, + "timestamp": "2025-10-01 03:26:06.358535", + "step": 4002, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:06.399473", + "step": 4002, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001810655347071588, + "timestamp": "2025-10-01 03:26:06.412780", + "step": 4003, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:06.455677", + "step": 4003, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001877355738542974, + "timestamp": "2025-10-01 03:26:06.488545", + "step": 4004, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:06.536112", + "step": 4004, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009281349368393421, + "timestamp": "2025-10-01 03:26:06.540790", + "step": 4005, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:06.584355", + "step": 4005, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004276403225958347, + "timestamp": "2025-10-01 03:26:06.594977", + "step": 4006, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:06.630397", + "step": 4006, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021669073030352592, + "timestamp": "2025-10-01 03:26:06.642115", + "step": 4007, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:06.688085", + "step": 4007, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004126495216041803, + "timestamp": "2025-10-01 03:26:06.726805", + "step": 4008, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:06.761785", + "step": 4008, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.051268111914396286, + "timestamp": "2025-10-01 03:26:06.765384", + "step": 4009, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:06.807084", + "step": 4009, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029096489772200584, + "timestamp": "2025-10-01 03:26:06.815640", + "step": 4010, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:06.857257", + "step": 4010, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06834260374307632, + "timestamp": "2025-10-01 03:26:06.868635", + "step": 4011, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:06.914851", + "step": 4011, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03979070857167244, + "timestamp": "2025-10-01 03:26:06.950866", + "step": 4012, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:06.994007", + "step": 4012, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05523044615983963, + "timestamp": "2025-10-01 03:26:07.008702", + "step": 4013, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:07.055183", + "step": 4013, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012051734374836087, + "timestamp": "2025-10-01 03:26:07.070971", + "step": 4014, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:07.115240", + "step": 4014, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017355123534798622, + "timestamp": "2025-10-01 03:26:07.130727", + "step": 4015, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.171669", + "step": 4015, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027306048199534416, + "timestamp": "2025-10-01 03:26:07.205979", + "step": 4016, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.242667", + "step": 4016, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003068166086450219, + "timestamp": "2025-10-01 03:26:07.251263", + "step": 4017, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.289274", + "step": 4017, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.053040772676467896, + "timestamp": "2025-10-01 03:26:07.299971", + "step": 4018, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.340398", + "step": 4018, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024685191456228495, + "timestamp": "2025-10-01 03:26:07.349277", + "step": 4019, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.388228", + "step": 4019, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02554498426616192, + "timestamp": "2025-10-01 03:26:07.414749", + "step": 4020, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.447685", + "step": 4020, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024050692096352577, + "timestamp": "2025-10-01 03:26:07.452545", + "step": 4021, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:07.492470", + "step": 4021, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04476050287485123, + "timestamp": "2025-10-01 03:26:07.496568", + "step": 4022, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.528840", + "step": 4022, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.039506398141384125, + "timestamp": "2025-10-01 03:26:07.532972", + "step": 4023, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:07.565180", + "step": 4023, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007668409496545792, + "timestamp": "2025-10-01 03:26:07.590366", + "step": 4024, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.624340", + "step": 4024, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009350958280265331, + "timestamp": "2025-10-01 03:26:07.629006", + "step": 4025, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.663237", + "step": 4025, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028601020574569702, + "timestamp": "2025-10-01 03:26:07.668811", + "step": 4026, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.703910", + "step": 4026, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026026921346783638, + "timestamp": "2025-10-01 03:26:07.708697", + "step": 4027, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.744098", + "step": 4027, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022417839616537094, + "timestamp": "2025-10-01 03:26:07.771539", + "step": 4028, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.806244", + "step": 4028, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012936959974467754, + "timestamp": "2025-10-01 03:26:07.812400", + "step": 4029, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.846263", + "step": 4029, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028803203254938126, + "timestamp": "2025-10-01 03:26:07.851747", + "step": 4030, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:07.885599", + "step": 4030, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05200082063674927, + "timestamp": "2025-10-01 03:26:07.892182", + "step": 4031, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.926631", + "step": 4031, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01175127923488617, + "timestamp": "2025-10-01 03:26:07.952310", + "step": 4032, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:07.989609", + "step": 4032, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010457801632583141, + "timestamp": "2025-10-01 03:26:07.999740", + "step": 4033, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.042918", + "step": 4033, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002561860252171755, + "timestamp": "2025-10-01 03:26:08.053372", + "step": 4034, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:08.090887", + "step": 4034, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01231359038501978, + "timestamp": "2025-10-01 03:26:08.095443", + "step": 4035, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.130246", + "step": 4035, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037587389815598726, + "timestamp": "2025-10-01 03:26:08.156511", + "step": 4036, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:08.191026", + "step": 4036, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003714749589562416, + "timestamp": "2025-10-01 03:26:08.196980", + "step": 4037, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.233703", + "step": 4037, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017547886818647385, + "timestamp": "2025-10-01 03:26:08.236857", + "step": 4038, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.269578", + "step": 4038, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029780694749206305, + "timestamp": "2025-10-01 03:26:08.277749", + "step": 4039, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.322554", + "step": 4039, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06232333928346634, + "timestamp": "2025-10-01 03:26:08.356081", + "step": 4040, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.395051", + "step": 4040, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02560357376933098, + "timestamp": "2025-10-01 03:26:08.405475", + "step": 4041, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.443143", + "step": 4041, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002139816526323557, + "timestamp": "2025-10-01 03:26:08.455042", + "step": 4042, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.493379", + "step": 4042, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007513374090194702, + "timestamp": "2025-10-01 03:26:08.500597", + "step": 4043, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.535325", + "step": 4043, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017363793449476361, + "timestamp": "2025-10-01 03:26:08.561356", + "step": 4044, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.597530", + "step": 4044, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005228831432759762, + "timestamp": "2025-10-01 03:26:08.604181", + "step": 4045, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:08.640475", + "step": 4045, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00902493018656969, + "timestamp": "2025-10-01 03:26:08.645155", + "step": 4046, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.679230", + "step": 4046, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023187363520264626, + "timestamp": "2025-10-01 03:26:08.684351", + "step": 4047, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:08.719415", + "step": 4047, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0394943468272686, + "timestamp": "2025-10-01 03:26:08.744463", + "step": 4048, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.777589", + "step": 4048, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017237650463357568, + "timestamp": "2025-10-01 03:26:08.781393", + "step": 4049, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.817139", + "step": 4049, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01619976945221424, + "timestamp": "2025-10-01 03:26:08.821096", + "step": 4050, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.857677", + "step": 4050, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009304017759859562, + "timestamp": "2025-10-01 03:26:08.865864", + "step": 4051, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.904797", + "step": 4051, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010964928194880486, + "timestamp": "2025-10-01 03:26:08.935742", + "step": 4052, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:08.975679", + "step": 4052, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017745163291692734, + "timestamp": "2025-10-01 03:26:08.980218", + "step": 4053, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.014245", + "step": 4053, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05870097875595093, + "timestamp": "2025-10-01 03:26:09.018373", + "step": 4054, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.052684", + "step": 4054, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011500478722155094, + "timestamp": "2025-10-01 03:26:09.056875", + "step": 4055, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.092930", + "step": 4055, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0049234963953495026, + "timestamp": "2025-10-01 03:26:09.118689", + "step": 4056, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.167829", + "step": 4056, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002480998169630766, + "timestamp": "2025-10-01 03:26:09.172702", + "step": 4057, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.212770", + "step": 4057, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006810827646404505, + "timestamp": "2025-10-01 03:26:09.215982", + "step": 4058, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:09.255843", + "step": 4058, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02388112060725689, + "timestamp": "2025-10-01 03:26:09.259000", + "step": 4059, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.312189", + "step": 4059, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04639556631445885, + "timestamp": "2025-10-01 03:26:09.339006", + "step": 4060, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.379400", + "step": 4060, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004146786872297525, + "timestamp": "2025-10-01 03:26:09.389077", + "step": 4061, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.432054", + "step": 4061, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008343561552464962, + "timestamp": "2025-10-01 03:26:09.439967", + "step": 4062, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.476358", + "step": 4062, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03339153900742531, + "timestamp": "2025-10-01 03:26:09.483266", + "step": 4063, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:09.526059", + "step": 4063, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.042045269161462784, + "timestamp": "2025-10-01 03:26:09.553987", + "step": 4064, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.590069", + "step": 4064, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03932817652821541, + "timestamp": "2025-10-01 03:26:09.595930", + "step": 4065, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.636516", + "step": 4065, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009577085147611797, + "timestamp": "2025-10-01 03:26:09.640671", + "step": 4066, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.674184", + "step": 4066, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022061564959585667, + "timestamp": "2025-10-01 03:26:09.679758", + "step": 4067, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.716831", + "step": 4067, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028211528435349464, + "timestamp": "2025-10-01 03:26:09.743560", + "step": 4068, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.775634", + "step": 4068, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029731623362749815, + "timestamp": "2025-10-01 03:26:09.778810", + "step": 4069, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.811779", + "step": 4069, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012804709374904633, + "timestamp": "2025-10-01 03:26:09.814869", + "step": 4070, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:09.852415", + "step": 4070, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009806185029447079, + "timestamp": "2025-10-01 03:26:09.855265", + "step": 4071, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.889043", + "step": 4071, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0045038312673568726, + "timestamp": "2025-10-01 03:26:09.915348", + "step": 4072, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.948901", + "step": 4072, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02080012857913971, + "timestamp": "2025-10-01 03:26:09.951429", + "step": 4073, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:09.986131", + "step": 4073, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01048140786588192, + "timestamp": "2025-10-01 03:26:09.990045", + "step": 4074, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.022599", + "step": 4074, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009941770695149899, + "timestamp": "2025-10-01 03:26:10.026228", + "step": 4075, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.060698", + "step": 4075, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008620855398476124, + "timestamp": "2025-10-01 03:26:10.089678", + "step": 4076, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.130616", + "step": 4076, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007192046381533146, + "timestamp": "2025-10-01 03:26:10.139165", + "step": 4077, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.175668", + "step": 4077, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012809520587325096, + "timestamp": "2025-10-01 03:26:10.179067", + "step": 4078, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.212303", + "step": 4078, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029741209000349045, + "timestamp": "2025-10-01 03:26:10.215154", + "step": 4079, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.254980", + "step": 4079, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01921754702925682, + "timestamp": "2025-10-01 03:26:10.280137", + "step": 4080, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.312923", + "step": 4080, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008182207122445107, + "timestamp": "2025-10-01 03:26:10.316783", + "step": 4081, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:10.352956", + "step": 4081, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004736381117254496, + "timestamp": "2025-10-01 03:26:10.359050", + "step": 4082, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.392962", + "step": 4082, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006989248562604189, + "timestamp": "2025-10-01 03:26:10.399016", + "step": 4083, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.432835", + "step": 4083, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013985030353069305, + "timestamp": "2025-10-01 03:26:10.458806", + "step": 4084, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.492355", + "step": 4084, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004273655824363232, + "timestamp": "2025-10-01 03:26:10.498057", + "step": 4085, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.533137", + "step": 4085, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01692543737590313, + "timestamp": "2025-10-01 03:26:10.535472", + "step": 4086, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.568840", + "step": 4086, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02413303032517433, + "timestamp": "2025-10-01 03:26:10.573192", + "step": 4087, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.606724", + "step": 4087, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002516580279916525, + "timestamp": "2025-10-01 03:26:10.636971", + "step": 4088, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.676337", + "step": 4088, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06030198559165001, + "timestamp": "2025-10-01 03:26:10.686397", + "step": 4089, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:10.726409", + "step": 4089, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003208654699847102, + "timestamp": "2025-10-01 03:26:10.734203", + "step": 4090, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.769887", + "step": 4090, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0071142432279884815, + "timestamp": "2025-10-01 03:26:10.775100", + "step": 4091, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.808918", + "step": 4091, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03388933837413788, + "timestamp": "2025-10-01 03:26:10.835791", + "step": 4092, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.871779", + "step": 4092, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022908655926585197, + "timestamp": "2025-10-01 03:26:10.878065", + "step": 4093, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:10.913123", + "step": 4093, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005498480051755905, + "timestamp": "2025-10-01 03:26:10.919591", + "step": 4094, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:10.955819", + "step": 4094, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006570687051862478, + "timestamp": "2025-10-01 03:26:10.963681", + "step": 4095, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:11.000350", + "step": 4095, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01754389889538288, + "timestamp": "2025-10-01 03:26:11.028847", + "step": 4096, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:11.061494", + "step": 4096, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014572429470717907, + "timestamp": "2025-10-01 03:26:11.066922", + "step": 4097, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:11.103968", + "step": 4097, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005259990692138672, + "timestamp": "2025-10-01 03:26:11.109138", + "step": 4098, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:11.146675", + "step": 4098, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007367550861090422, + "timestamp": "2025-10-01 03:26:11.149555", + "step": 4099, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:11.183660", + "step": 4099, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017121804878115654, + "timestamp": "2025-10-01 03:26:11.213784", + "step": 4100, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:11.255855", + "step": 4100, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005537052056752145, + "timestamp": "2025-10-01 03:26:11.267887", + "step": 4101, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:11.309466", + "step": 4101, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01701504737138748, + "timestamp": "2025-10-01 03:26:11.321219", + "step": 4102, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:11.361778", + "step": 4102, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006614282727241516, + "timestamp": "2025-10-01 03:26:11.370537", + "step": 4103, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:11.407584", + "step": 4103, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00594352837651968, + "timestamp": "2025-10-01 03:26:11.436695", + "step": 4104, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:26:14.323559", + "step": 4104, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2326559.453892907, + "timestamp": "2025-10-01 03:26:14.335248", + "step": 4104, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:14.372287", + "step": 4104, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00044030966819263995, + "timestamp": "2025-10-01 03:26:14.376097", + "step": 4105, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:14.418996", + "step": 4105, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011308628134429455, + "timestamp": "2025-10-01 03:26:14.423927", + "step": 4106, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:14.460289", + "step": 4106, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009432454826310277, + "timestamp": "2025-10-01 03:26:14.473757", + "step": 4107, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:14.516766", + "step": 4107, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022987565025687218, + "timestamp": "2025-10-01 03:26:14.541132", + "step": 4108, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:14.581886", + "step": 4108, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.052241288125514984, + "timestamp": "2025-10-01 03:26:14.593287", + "step": 4109, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:14.632831", + "step": 4109, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009402628056704998, + "timestamp": "2025-10-01 03:26:14.642511", + "step": 4110, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:14.684391", + "step": 4110, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009956629946827888, + "timestamp": "2025-10-01 03:26:14.694218", + "step": 4111, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:14.735542", + "step": 4111, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030783284455537796, + "timestamp": "2025-10-01 03:26:14.767826", + "step": 4112, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:14.810035", + "step": 4112, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008501344127580523, + "timestamp": "2025-10-01 03:26:14.820255", + "step": 4113, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:14.862386", + "step": 4113, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004687561187893152, + "timestamp": "2025-10-01 03:26:14.872064", + "step": 4114, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:14.911550", + "step": 4114, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029380422085523605, + "timestamp": "2025-10-01 03:26:14.919358", + "step": 4115, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:14.956248", + "step": 4115, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011121859773993492, + "timestamp": "2025-10-01 03:26:14.986030", + "step": 4116, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.022418", + "step": 4116, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02533290721476078, + "timestamp": "2025-10-01 03:26:15.031493", + "step": 4117, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:15.070207", + "step": 4117, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007660826668143272, + "timestamp": "2025-10-01 03:26:15.077313", + "step": 4118, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.117307", + "step": 4118, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004994208458811045, + "timestamp": "2025-10-01 03:26:15.125839", + "step": 4119, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.163172", + "step": 4119, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01217107754200697, + "timestamp": "2025-10-01 03:26:15.193870", + "step": 4120, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.231437", + "step": 4120, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00935297179967165, + "timestamp": "2025-10-01 03:26:15.243212", + "step": 4121, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.283839", + "step": 4121, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007490543066523969, + "timestamp": "2025-10-01 03:26:15.295579", + "step": 4122, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.336534", + "step": 4122, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023110974580049515, + "timestamp": "2025-10-01 03:26:15.339753", + "step": 4123, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.373094", + "step": 4123, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001170689007267356, + "timestamp": "2025-10-01 03:26:15.399558", + "step": 4124, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:15.441023", + "step": 4124, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005821545608341694, + "timestamp": "2025-10-01 03:26:15.450031", + "step": 4125, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:15.488833", + "step": 4125, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018000256270170212, + "timestamp": "2025-10-01 03:26:15.496499", + "step": 4126, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.537732", + "step": 4126, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027423701249063015, + "timestamp": "2025-10-01 03:26:15.541512", + "step": 4127, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.575084", + "step": 4127, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06202465295791626, + "timestamp": "2025-10-01 03:26:15.605322", + "step": 4128, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:15.644995", + "step": 4128, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001764451153576374, + "timestamp": "2025-10-01 03:26:15.648867", + "step": 4129, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.681832", + "step": 4129, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005349884158931673, + "timestamp": "2025-10-01 03:26:15.693130", + "step": 4130, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.732026", + "step": 4130, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008036891929805279, + "timestamp": "2025-10-01 03:26:15.743766", + "step": 4131, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:15.787708", + "step": 4131, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002309997333213687, + "timestamp": "2025-10-01 03:26:15.816084", + "step": 4132, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:15.852789", + "step": 4132, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016048444667831063, + "timestamp": "2025-10-01 03:26:15.856896", + "step": 4133, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.892825", + "step": 4133, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01970299892127514, + "timestamp": "2025-10-01 03:26:15.896990", + "step": 4134, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.932182", + "step": 4134, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017559007974341512, + "timestamp": "2025-10-01 03:26:15.935346", + "step": 4135, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:15.969611", + "step": 4135, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0182795487344265, + "timestamp": "2025-10-01 03:26:15.997902", + "step": 4136, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.038868", + "step": 4136, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018287993734702468, + "timestamp": "2025-10-01 03:26:16.048288", + "step": 4137, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.087845", + "step": 4137, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005529047339223325, + "timestamp": "2025-10-01 03:26:16.091568", + "step": 4138, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.123845", + "step": 4138, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025416085496544838, + "timestamp": "2025-10-01 03:26:16.126765", + "step": 4139, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.159618", + "step": 4139, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001208851346746087, + "timestamp": "2025-10-01 03:26:16.184530", + "step": 4140, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.216144", + "step": 4140, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011042177211493254, + "timestamp": "2025-10-01 03:26:16.220066", + "step": 4141, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.252174", + "step": 4141, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008090917952358723, + "timestamp": "2025-10-01 03:26:16.254961", + "step": 4142, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:16.286611", + "step": 4142, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004276198800653219, + "timestamp": "2025-10-01 03:26:16.292938", + "step": 4143, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.329133", + "step": 4143, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002711484266910702, + "timestamp": "2025-10-01 03:26:16.357984", + "step": 4144, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.397536", + "step": 4144, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004667733155656606, + "timestamp": "2025-10-01 03:26:16.401305", + "step": 4145, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:26:16.434789", + "step": 4145, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005456527578644454, + "timestamp": "2025-10-01 03:26:16.439053", + "step": 4146, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:16.473547", + "step": 4146, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035020820796489716, + "timestamp": "2025-10-01 03:26:16.477810", + "step": 4147, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:16.511048", + "step": 4147, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002726638922467828, + "timestamp": "2025-10-01 03:26:16.540499", + "step": 4148, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:16.577607", + "step": 4148, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004158205818384886, + "timestamp": "2025-10-01 03:26:16.583898", + "step": 4149, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.618297", + "step": 4149, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012769008986651897, + "timestamp": "2025-10-01 03:26:16.622980", + "step": 4150, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.656833", + "step": 4150, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011088420869782567, + "timestamp": "2025-10-01 03:26:16.659567", + "step": 4151, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.712950", + "step": 4151, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012986741494387388, + "timestamp": "2025-10-01 03:26:16.738436", + "step": 4152, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:16.781112", + "step": 4152, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012844083830714226, + "timestamp": "2025-10-01 03:26:16.784157", + "step": 4153, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.816867", + "step": 4153, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015355529030784965, + "timestamp": "2025-10-01 03:26:16.819579", + "step": 4154, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.851944", + "step": 4154, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01701490767300129, + "timestamp": "2025-10-01 03:26:16.861581", + "step": 4155, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.904914", + "step": 4155, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016196897253394127, + "timestamp": "2025-10-01 03:26:16.936015", + "step": 4156, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:16.977004", + "step": 4156, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0040773008950054646, + "timestamp": "2025-10-01 03:26:16.985513", + "step": 4157, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:17.022517", + "step": 4157, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007516301237046719, + "timestamp": "2025-10-01 03:26:17.026570", + "step": 4158, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.061610", + "step": 4158, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018678052350878716, + "timestamp": "2025-10-01 03:26:17.070561", + "step": 4159, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.107928", + "step": 4159, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04626429080963135, + "timestamp": "2025-10-01 03:26:17.132421", + "step": 4160, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.165896", + "step": 4160, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037037150468677282, + "timestamp": "2025-10-01 03:26:17.169496", + "step": 4161, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:17.203186", + "step": 4161, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026911557652056217, + "timestamp": "2025-10-01 03:26:17.205796", + "step": 4162, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:17.237114", + "step": 4162, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007127028424292803, + "timestamp": "2025-10-01 03:26:17.241989", + "step": 4163, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.278118", + "step": 4163, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004234790336340666, + "timestamp": "2025-10-01 03:26:17.306621", + "step": 4164, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.342696", + "step": 4164, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00819236971437931, + "timestamp": "2025-10-01 03:26:17.346924", + "step": 4165, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.382607", + "step": 4165, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004787460435181856, + "timestamp": "2025-10-01 03:26:17.388952", + "step": 4166, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:26:17.424188", + "step": 4166, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011382665252313018, + "timestamp": "2025-10-01 03:26:17.433302", + "step": 4167, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.471316", + "step": 4167, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013810306787490845, + "timestamp": "2025-10-01 03:26:17.501126", + "step": 4168, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:26:17.535107", + "step": 4168, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004986834246665239, + "timestamp": "2025-10-01 03:26:17.542026", + "step": 4169, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:17.576036", + "step": 4169, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000706730701494962, + "timestamp": "2025-10-01 03:26:17.583123", + "step": 4170, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.618951", + "step": 4170, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021853698417544365, + "timestamp": "2025-10-01 03:26:17.623656", + "step": 4171, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.658598", + "step": 4171, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001225113868713379, + "timestamp": "2025-10-01 03:26:17.684942", + "step": 4172, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:17.718751", + "step": 4172, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010520008392632008, + "timestamp": "2025-10-01 03:26:17.722530", + "step": 4173, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.754852", + "step": 4173, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025274718180298805, + "timestamp": "2025-10-01 03:26:17.772981", + "step": 4174, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.813523", + "step": 4174, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012356170918792486, + "timestamp": "2025-10-01 03:26:17.816631", + "step": 4175, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.849286", + "step": 4175, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002674005809240043, + "timestamp": "2025-10-01 03:26:17.877156", + "step": 4176, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:17.911607", + "step": 4176, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021074561402201653, + "timestamp": "2025-10-01 03:26:17.918685", + "step": 4177, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.952661", + "step": 4177, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013567742425948381, + "timestamp": "2025-10-01 03:26:17.959752", + "step": 4178, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:17.996423", + "step": 4178, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005286911618895829, + "timestamp": "2025-10-01 03:26:18.007847", + "step": 4179, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.047774", + "step": 4179, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007253837771713734, + "timestamp": "2025-10-01 03:26:18.075597", + "step": 4180, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.113094", + "step": 4180, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000722621101886034, + "timestamp": "2025-10-01 03:26:18.121584", + "step": 4181, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.157790", + "step": 4181, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00019134658214170486, + "timestamp": "2025-10-01 03:26:18.165271", + "step": 4182, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.207010", + "step": 4182, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007790768868289888, + "timestamp": "2025-10-01 03:26:18.214650", + "step": 4183, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.255540", + "step": 4183, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00889648962765932, + "timestamp": "2025-10-01 03:26:18.281682", + "step": 4184, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.315374", + "step": 4184, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011775549501180649, + "timestamp": "2025-10-01 03:26:18.319853", + "step": 4185, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.353788", + "step": 4185, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03943416848778725, + "timestamp": "2025-10-01 03:26:18.359587", + "step": 4186, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.396536", + "step": 4186, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002061492530629039, + "timestamp": "2025-10-01 03:26:18.401250", + "step": 4187, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:18.434750", + "step": 4187, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04005378857254982, + "timestamp": "2025-10-01 03:26:18.460940", + "step": 4188, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.496862", + "step": 4188, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025475064292550087, + "timestamp": "2025-10-01 03:26:18.502047", + "step": 4189, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.539027", + "step": 4189, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011647654697299004, + "timestamp": "2025-10-01 03:26:18.545412", + "step": 4190, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.583669", + "step": 4190, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021925467997789383, + "timestamp": "2025-10-01 03:26:18.595807", + "step": 4191, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.635806", + "step": 4191, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00244794855825603, + "timestamp": "2025-10-01 03:26:18.661440", + "step": 4192, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.695237", + "step": 4192, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021205104421824217, + "timestamp": "2025-10-01 03:26:18.698732", + "step": 4193, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.733110", + "step": 4193, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006979660247452557, + "timestamp": "2025-10-01 03:26:18.736237", + "step": 4194, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:18.770238", + "step": 4194, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003855173126794398, + "timestamp": "2025-10-01 03:26:18.774969", + "step": 4195, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.808919", + "step": 4195, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010184928542003036, + "timestamp": "2025-10-01 03:26:18.834773", + "step": 4196, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.868388", + "step": 4196, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003582289908081293, + "timestamp": "2025-10-01 03:26:18.873382", + "step": 4197, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.907707", + "step": 4197, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007159894797950983, + "timestamp": "2025-10-01 03:26:18.918664", + "step": 4198, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:18.954930", + "step": 4198, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001213093171827495, + "timestamp": "2025-10-01 03:26:18.963748", + "step": 4199, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.014235", + "step": 4199, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014430281706154346, + "timestamp": "2025-10-01 03:26:19.045809", + "step": 4200, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.081301", + "step": 4200, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010853060521185398, + "timestamp": "2025-10-01 03:26:19.085885", + "step": 4201, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:19.120392", + "step": 4201, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033820581156760454, + "timestamp": "2025-10-01 03:26:19.124619", + "step": 4202, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.159794", + "step": 4202, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005629491060972214, + "timestamp": "2025-10-01 03:26:19.163572", + "step": 4203, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.197442", + "step": 4203, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004564437083899975, + "timestamp": "2025-10-01 03:26:19.222365", + "step": 4204, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.256216", + "step": 4204, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009006226435303688, + "timestamp": "2025-10-01 03:26:19.262909", + "step": 4205, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.298627", + "step": 4205, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009401978924870491, + "timestamp": "2025-10-01 03:26:19.303683", + "step": 4206, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.336342", + "step": 4206, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008715606294572353, + "timestamp": "2025-10-01 03:26:19.342409", + "step": 4207, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.379023", + "step": 4207, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012346671428531408, + "timestamp": "2025-10-01 03:26:19.408343", + "step": 4208, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.443546", + "step": 4208, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01957501471042633, + "timestamp": "2025-10-01 03:26:19.448149", + "step": 4209, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.480934", + "step": 4209, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012803839519619942, + "timestamp": "2025-10-01 03:26:19.492604", + "step": 4210, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.533133", + "step": 4210, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004531282465904951, + "timestamp": "2025-10-01 03:26:19.543137", + "step": 4211, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:26:19.584779", + "step": 4211, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001314247027039528, + "timestamp": "2025-10-01 03:26:19.614088", + "step": 4212, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.649850", + "step": 4212, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017158976988866925, + "timestamp": "2025-10-01 03:26:19.656290", + "step": 4213, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.692668", + "step": 4213, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005897831870242953, + "timestamp": "2025-10-01 03:26:19.698968", + "step": 4214, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.733788", + "step": 4214, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023331377655267715, + "timestamp": "2025-10-01 03:26:19.739895", + "step": 4215, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.774898", + "step": 4215, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.07508482038974762, + "timestamp": "2025-10-01 03:26:19.802281", + "step": 4216, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.838069", + "step": 4216, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003137824358418584, + "timestamp": "2025-10-01 03:26:19.841898", + "step": 4217, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:19.876185", + "step": 4217, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0048927743919193745, + "timestamp": "2025-10-01 03:26:19.885247", + "step": 4218, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.934563", + "step": 4218, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001822038204409182, + "timestamp": "2025-10-01 03:26:19.945766", + "step": 4219, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:19.999709", + "step": 4219, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002658974553924054, + "timestamp": "2025-10-01 03:26:20.026738", + "step": 4220, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:20.065565", + "step": 4220, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004606317379511893, + "timestamp": "2025-10-01 03:26:20.076944", + "step": 4221, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:20.115403", + "step": 4221, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005584527971222997, + "timestamp": "2025-10-01 03:26:20.126696", + "step": 4222, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:20.166855", + "step": 4222, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009209368145093322, + "timestamp": "2025-10-01 03:26:20.176223", + "step": 4223, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:20.214018", + "step": 4223, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019907325506210327, + "timestamp": "2025-10-01 03:26:20.244304", + "step": 4224, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:20.280355", + "step": 4224, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018464801833033562, + "timestamp": "2025-10-01 03:26:20.288148", + "step": 4225, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:20.330523", + "step": 4225, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009612090652808547, + "timestamp": "2025-10-01 03:26:20.341728", + "step": 4226, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:20.381735", + "step": 4226, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031008042860776186, + "timestamp": "2025-10-01 03:26:20.394496", + "step": 4227, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:20.435242", + "step": 4227, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016168189467862248, + "timestamp": "2025-10-01 03:26:20.473680", + "step": 4228, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:20.516529", + "step": 4228, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001871555723482743, + "timestamp": "2025-10-01 03:26:20.528712", + "step": 4229, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:20.569970", + "step": 4229, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01222920697182417, + "timestamp": "2025-10-01 03:26:20.580803", + "step": 4230, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:20.622575", + "step": 4230, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00041743743349798024, + "timestamp": "2025-10-01 03:26:20.630328", + "step": 4231, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:20.667311", + "step": 4231, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022196092177182436, + "timestamp": "2025-10-01 03:26:20.700396", + "step": 4232, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:20.736617", + "step": 4232, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006590009666979313, + "timestamp": "2025-10-01 03:26:20.741841", + "step": 4233, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:20.777771", + "step": 4233, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010770439403131604, + "timestamp": "2025-10-01 03:26:20.784878", + "step": 4234, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:20.821965", + "step": 4234, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013715310022234917, + "timestamp": "2025-10-01 03:26:20.828426", + "step": 4235, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:20.863854", + "step": 4235, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014941597357392311, + "timestamp": "2025-10-01 03:26:20.895544", + "step": 4236, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:20.933714", + "step": 4236, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015468428609892726, + "timestamp": "2025-10-01 03:26:20.940101", + "step": 4237, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:20.975295", + "step": 4237, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017838251078501344, + "timestamp": "2025-10-01 03:26:20.981258", + "step": 4238, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.016671", + "step": 4238, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016773154959082603, + "timestamp": "2025-10-01 03:26:21.022397", + "step": 4239, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:21.058254", + "step": 4239, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01703275740146637, + "timestamp": "2025-10-01 03:26:21.083022", + "step": 4240, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:21.117774", + "step": 4240, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007660402916371822, + "timestamp": "2025-10-01 03:26:21.124485", + "step": 4241, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.161760", + "step": 4241, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005614692927338183, + "timestamp": "2025-10-01 03:26:21.166157", + "step": 4242, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.199061", + "step": 4242, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006952236290089786, + "timestamp": "2025-10-01 03:26:21.204370", + "step": 4243, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.246847", + "step": 4243, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.043008219450712204, + "timestamp": "2025-10-01 03:26:21.272571", + "step": 4244, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.307064", + "step": 4244, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00046955555444583297, + "timestamp": "2025-10-01 03:26:21.315793", + "step": 4245, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.354872", + "step": 4245, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005826625041663647, + "timestamp": "2025-10-01 03:26:21.363708", + "step": 4246, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.403373", + "step": 4246, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06686720252037048, + "timestamp": "2025-10-01 03:26:21.413474", + "step": 4247, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.454261", + "step": 4247, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015395254828035831, + "timestamp": "2025-10-01 03:26:21.486563", + "step": 4248, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.529398", + "step": 4248, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037640391383320093, + "timestamp": "2025-10-01 03:26:21.543885", + "step": 4249, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.585998", + "step": 4249, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008857092820107937, + "timestamp": "2025-10-01 03:26:21.599965", + "step": 4250, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.641977", + "step": 4250, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03323648124933243, + "timestamp": "2025-10-01 03:26:21.657809", + "step": 4251, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.701834", + "step": 4251, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015119981952011585, + "timestamp": "2025-10-01 03:26:21.739269", + "step": 4252, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.777287", + "step": 4252, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004686181899160147, + "timestamp": "2025-10-01 03:26:21.784949", + "step": 4253, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:21.822665", + "step": 4253, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015640877187252045, + "timestamp": "2025-10-01 03:26:21.832119", + "step": 4254, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:21.872599", + "step": 4254, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005040521267801523, + "timestamp": "2025-10-01 03:26:21.882622", + "step": 4255, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:21.916275", + "step": 4255, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00028925336664542556, + "timestamp": "2025-10-01 03:26:21.942151", + "step": 4256, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:26:24.382497", + "step": 4256, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2702084.8260652632, + "timestamp": "2025-10-01 03:26:24.389394", + "step": 4256, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:24.434317", + "step": 4256, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027030829805880785, + "timestamp": "2025-10-01 03:26:24.437672", + "step": 4257, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:24.471971", + "step": 4257, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00014445588749367744, + "timestamp": "2025-10-01 03:26:24.475783", + "step": 4258, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:24.508668", + "step": 4258, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004456525493878871, + "timestamp": "2025-10-01 03:26:24.511748", + "step": 4259, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:24.544354", + "step": 4259, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010070526972413063, + "timestamp": "2025-10-01 03:26:24.568750", + "step": 4260, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:24.603134", + "step": 4260, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036957929842174053, + "timestamp": "2025-10-01 03:26:24.610667", + "step": 4261, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:24.648278", + "step": 4261, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023724439088255167, + "timestamp": "2025-10-01 03:26:24.651172", + "step": 4262, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:24.684679", + "step": 4262, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006946899462491274, + "timestamp": "2025-10-01 03:26:24.688542", + "step": 4263, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:24.722411", + "step": 4263, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005977590102702379, + "timestamp": "2025-10-01 03:26:24.746555", + "step": 4264, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:24.780878", + "step": 4264, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003380929585546255, + "timestamp": "2025-10-01 03:26:24.787051", + "step": 4265, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:24.833503", + "step": 4265, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04853494092822075, + "timestamp": "2025-10-01 03:26:24.836684", + "step": 4266, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:24.871043", + "step": 4266, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012739034369587898, + "timestamp": "2025-10-01 03:26:24.874620", + "step": 4267, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:24.909305", + "step": 4267, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009761219844222069, + "timestamp": "2025-10-01 03:26:24.933356", + "step": 4268, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:24.971387", + "step": 4268, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02575451321899891, + "timestamp": "2025-10-01 03:26:24.973683", + "step": 4269, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.011052", + "step": 4269, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0068969642743468285, + "timestamp": "2025-10-01 03:26:25.013458", + "step": 4270, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.047402", + "step": 4270, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00048716182936914265, + "timestamp": "2025-10-01 03:26:25.049800", + "step": 4271, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.084977", + "step": 4271, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005421128589659929, + "timestamp": "2025-10-01 03:26:25.108609", + "step": 4272, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.143772", + "step": 4272, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018438037484884262, + "timestamp": "2025-10-01 03:26:25.146877", + "step": 4273, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.180345", + "step": 4273, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05084080249071121, + "timestamp": "2025-10-01 03:26:25.182558", + "step": 4274, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.215251", + "step": 4274, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01817874051630497, + "timestamp": "2025-10-01 03:26:25.217412", + "step": 4275, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:25.250208", + "step": 4275, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005470517557114363, + "timestamp": "2025-10-01 03:26:25.274118", + "step": 4276, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.308609", + "step": 4276, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006165810045786202, + "timestamp": "2025-10-01 03:26:25.310847", + "step": 4277, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:25.344969", + "step": 4277, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04453638941049576, + "timestamp": "2025-10-01 03:26:25.347902", + "step": 4278, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:25.382355", + "step": 4278, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006722951075062156, + "timestamp": "2025-10-01 03:26:25.386026", + "step": 4279, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:25.418918", + "step": 4279, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016737347468733788, + "timestamp": "2025-10-01 03:26:25.443637", + "step": 4280, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.483117", + "step": 4280, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006909620715305209, + "timestamp": "2025-10-01 03:26:25.485793", + "step": 4281, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.523297", + "step": 4281, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012206408428028226, + "timestamp": "2025-10-01 03:26:25.526032", + "step": 4282, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.567427", + "step": 4282, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019268250092864037, + "timestamp": "2025-10-01 03:26:25.569880", + "step": 4283, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.607337", + "step": 4283, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010108950082212687, + "timestamp": "2025-10-01 03:26:25.631893", + "step": 4284, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:25.668525", + "step": 4284, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007119496585801244, + "timestamp": "2025-10-01 03:26:25.671656", + "step": 4285, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:25.704034", + "step": 4285, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007992196478880942, + "timestamp": "2025-10-01 03:26:25.706830", + "step": 4286, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.744134", + "step": 4286, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021809080615639687, + "timestamp": "2025-10-01 03:26:25.746869", + "step": 4287, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.784005", + "step": 4287, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005378514179028571, + "timestamp": "2025-10-01 03:26:25.808392", + "step": 4288, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.844089", + "step": 4288, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006971051916480064, + "timestamp": "2025-10-01 03:26:25.847057", + "step": 4289, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.899559", + "step": 4289, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009898328222334385, + "timestamp": "2025-10-01 03:26:25.902646", + "step": 4290, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.940416", + "step": 4290, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003820839920081198, + "timestamp": "2025-10-01 03:26:25.943402", + "step": 4291, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:25.975554", + "step": 4291, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017068292945623398, + "timestamp": "2025-10-01 03:26:25.999928", + "step": 4292, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.037956", + "step": 4292, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0044291457161307335, + "timestamp": "2025-10-01 03:26:26.041385", + "step": 4293, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.076477", + "step": 4293, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011139537673443556, + "timestamp": "2025-10-01 03:26:26.079888", + "step": 4294, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.112916", + "step": 4294, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008397189900279045, + "timestamp": "2025-10-01 03:26:26.115762", + "step": 4295, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.161356", + "step": 4295, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001570134307257831, + "timestamp": "2025-10-01 03:26:26.185653", + "step": 4296, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.230529", + "step": 4296, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007625665981322527, + "timestamp": "2025-10-01 03:26:26.233132", + "step": 4297, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.265755", + "step": 4297, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002558500738814473, + "timestamp": "2025-10-01 03:26:26.269428", + "step": 4298, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.306056", + "step": 4298, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027041833847761154, + "timestamp": "2025-10-01 03:26:26.308967", + "step": 4299, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.342078", + "step": 4299, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025318029802292585, + "timestamp": "2025-10-01 03:26:26.366322", + "step": 4300, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.405616", + "step": 4300, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029780719429254532, + "timestamp": "2025-10-01 03:26:26.408041", + "step": 4301, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.456368", + "step": 4301, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003449033945798874, + "timestamp": "2025-10-01 03:26:26.458661", + "step": 4302, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:26.498509", + "step": 4302, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008866331190802157, + "timestamp": "2025-10-01 03:26:26.501053", + "step": 4303, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.536356", + "step": 4303, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009901409968733788, + "timestamp": "2025-10-01 03:26:26.560240", + "step": 4304, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.597791", + "step": 4304, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002585455309599638, + "timestamp": "2025-10-01 03:26:26.599945", + "step": 4305, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.633372", + "step": 4305, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018701447173953056, + "timestamp": "2025-10-01 03:26:26.635419", + "step": 4306, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.668891", + "step": 4306, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00035415729507803917, + "timestamp": "2025-10-01 03:26:26.673404", + "step": 4307, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:26.714584", + "step": 4307, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016709186136722565, + "timestamp": "2025-10-01 03:26:26.738396", + "step": 4308, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:26.771525", + "step": 4308, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023431938607245684, + "timestamp": "2025-10-01 03:26:26.774600", + "step": 4309, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.806888", + "step": 4309, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005801051389425993, + "timestamp": "2025-10-01 03:26:26.809142", + "step": 4310, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:26.842476", + "step": 4310, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005086303222924471, + "timestamp": "2025-10-01 03:26:26.844428", + "step": 4311, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:26.875515", + "step": 4311, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007512508425861597, + "timestamp": "2025-10-01 03:26:26.899385", + "step": 4312, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.933402", + "step": 4312, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00031101686181500554, + "timestamp": "2025-10-01 03:26:26.935716", + "step": 4313, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:26.968353", + "step": 4313, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00132520473562181, + "timestamp": "2025-10-01 03:26:26.970979", + "step": 4314, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:27.005401", + "step": 4314, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016640351386740804, + "timestamp": "2025-10-01 03:26:27.007485", + "step": 4315, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.048435", + "step": 4315, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003543522208929062, + "timestamp": "2025-10-01 03:26:27.072268", + "step": 4316, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.106572", + "step": 4316, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02794383279979229, + "timestamp": "2025-10-01 03:26:27.108984", + "step": 4317, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.143129", + "step": 4317, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016218017553910613, + "timestamp": "2025-10-01 03:26:27.145202", + "step": 4318, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.180870", + "step": 4318, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000622647232376039, + "timestamp": "2025-10-01 03:26:27.183109", + "step": 4319, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.216265", + "step": 4319, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001052493928000331, + "timestamp": "2025-10-01 03:26:27.240060", + "step": 4320, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.276841", + "step": 4320, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035168297588825226, + "timestamp": "2025-10-01 03:26:27.279195", + "step": 4321, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.317172", + "step": 4321, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.031189624220132828, + "timestamp": "2025-10-01 03:26:27.319451", + "step": 4322, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.364456", + "step": 4322, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0370226614177227, + "timestamp": "2025-10-01 03:26:27.366610", + "step": 4323, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:27.402594", + "step": 4323, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004042980261147022, + "timestamp": "2025-10-01 03:26:27.428188", + "step": 4324, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.487081", + "step": 4324, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012937113642692566, + "timestamp": "2025-10-01 03:26:27.489162", + "step": 4325, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.532234", + "step": 4325, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006959652062505484, + "timestamp": "2025-10-01 03:26:27.534630", + "step": 4326, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:27.569977", + "step": 4326, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018587468191981316, + "timestamp": "2025-10-01 03:26:27.572259", + "step": 4327, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.606959", + "step": 4327, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003924480173736811, + "timestamp": "2025-10-01 03:26:27.630520", + "step": 4328, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.662595", + "step": 4328, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010424634674564004, + "timestamp": "2025-10-01 03:26:27.664875", + "step": 4329, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.696563", + "step": 4329, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05029075965285301, + "timestamp": "2025-10-01 03:26:27.698884", + "step": 4330, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.733830", + "step": 4330, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004077300545759499, + "timestamp": "2025-10-01 03:26:27.736378", + "step": 4331, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.769544", + "step": 4331, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005423371214419603, + "timestamp": "2025-10-01 03:26:27.793526", + "step": 4332, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.832765", + "step": 4332, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008058448322117329, + "timestamp": "2025-10-01 03:26:27.835100", + "step": 4333, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.868042", + "step": 4333, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014625758631154895, + "timestamp": "2025-10-01 03:26:27.870271", + "step": 4334, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:26:27.903092", + "step": 4334, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010371112264692783, + "timestamp": "2025-10-01 03:26:27.905549", + "step": 4335, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.939555", + "step": 4335, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008479369804263115, + "timestamp": "2025-10-01 03:26:27.963560", + "step": 4336, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:27.995359", + "step": 4336, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008384405635297298, + "timestamp": "2025-10-01 03:26:27.997616", + "step": 4337, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.036902", + "step": 4337, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01903858408331871, + "timestamp": "2025-10-01 03:26:28.039092", + "step": 4338, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.072646", + "step": 4338, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004476909060031176, + "timestamp": "2025-10-01 03:26:28.075056", + "step": 4339, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:26:28.109517", + "step": 4339, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007464429829269648, + "timestamp": "2025-10-01 03:26:28.133304", + "step": 4340, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.165691", + "step": 4340, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001901493058539927, + "timestamp": "2025-10-01 03:26:28.167898", + "step": 4341, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:28.201229", + "step": 4341, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024308735504746437, + "timestamp": "2025-10-01 03:26:28.204492", + "step": 4342, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.235785", + "step": 4342, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009678524802438915, + "timestamp": "2025-10-01 03:26:28.237980", + "step": 4343, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.271508", + "step": 4343, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004505201708525419, + "timestamp": "2025-10-01 03:26:28.295620", + "step": 4344, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:28.330099", + "step": 4344, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00524537917226553, + "timestamp": "2025-10-01 03:26:28.332568", + "step": 4345, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:28.364307", + "step": 4345, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03254011273384094, + "timestamp": "2025-10-01 03:26:28.366500", + "step": 4346, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.399673", + "step": 4346, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0462631918489933, + "timestamp": "2025-10-01 03:26:28.401782", + "step": 4347, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.435852", + "step": 4347, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007818110170774162, + "timestamp": "2025-10-01 03:26:28.459818", + "step": 4348, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.494150", + "step": 4348, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018398717045783997, + "timestamp": "2025-10-01 03:26:28.496993", + "step": 4349, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.531163", + "step": 4349, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002087114844471216, + "timestamp": "2025-10-01 03:26:28.539460", + "step": 4350, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.577445", + "step": 4350, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009305873536504805, + "timestamp": "2025-10-01 03:26:28.579942", + "step": 4351, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.613046", + "step": 4351, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030097296461462975, + "timestamp": "2025-10-01 03:26:28.636615", + "step": 4352, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.669354", + "step": 4352, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003844754071906209, + "timestamp": "2025-10-01 03:26:28.671543", + "step": 4353, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:28.708762", + "step": 4353, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00163925439119339, + "timestamp": "2025-10-01 03:26:28.710831", + "step": 4354, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.742608", + "step": 4354, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008178637363016605, + "timestamp": "2025-10-01 03:26:28.745477", + "step": 4355, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.778665", + "step": 4355, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006328328046947718, + "timestamp": "2025-10-01 03:26:28.802375", + "step": 4356, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.837089", + "step": 4356, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02075173519551754, + "timestamp": "2025-10-01 03:26:28.839298", + "step": 4357, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:28.874741", + "step": 4357, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025727872271090746, + "timestamp": "2025-10-01 03:26:28.876927", + "step": 4358, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.907522", + "step": 4358, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038590047042816877, + "timestamp": "2025-10-01 03:26:28.909856", + "step": 4359, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:28.943063", + "step": 4359, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004559554159641266, + "timestamp": "2025-10-01 03:26:28.966893", + "step": 4360, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.000151", + "step": 4360, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009870928479358554, + "timestamp": "2025-10-01 03:26:29.002907", + "step": 4361, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.036826", + "step": 4361, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012955613201484084, + "timestamp": "2025-10-01 03:26:29.038944", + "step": 4362, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.072887", + "step": 4362, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003231597365811467, + "timestamp": "2025-10-01 03:26:29.075406", + "step": 4363, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.108357", + "step": 4363, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015304312109947205, + "timestamp": "2025-10-01 03:26:29.131976", + "step": 4364, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:29.165678", + "step": 4364, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01578163169324398, + "timestamp": "2025-10-01 03:26:29.168055", + "step": 4365, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.200295", + "step": 4365, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05641874670982361, + "timestamp": "2025-10-01 03:26:29.202589", + "step": 4366, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.239683", + "step": 4366, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019267303869128227, + "timestamp": "2025-10-01 03:26:29.241837", + "step": 4367, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.279610", + "step": 4367, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013006261549890041, + "timestamp": "2025-10-01 03:26:29.303323", + "step": 4368, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.342309", + "step": 4368, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030213145073503256, + "timestamp": "2025-10-01 03:26:29.344656", + "step": 4369, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.381853", + "step": 4369, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004056789446622133, + "timestamp": "2025-10-01 03:26:29.384013", + "step": 4370, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.415696", + "step": 4370, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021262546069920063, + "timestamp": "2025-10-01 03:26:29.417937", + "step": 4371, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.451059", + "step": 4371, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03787709027528763, + "timestamp": "2025-10-01 03:26:29.474779", + "step": 4372, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.509832", + "step": 4372, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024484756868332624, + "timestamp": "2025-10-01 03:26:29.511981", + "step": 4373, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.543119", + "step": 4373, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011206341907382011, + "timestamp": "2025-10-01 03:26:29.545192", + "step": 4374, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.580154", + "step": 4374, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04286631941795349, + "timestamp": "2025-10-01 03:26:29.585369", + "step": 4375, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.620786", + "step": 4375, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02235761284828186, + "timestamp": "2025-10-01 03:26:29.644450", + "step": 4376, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:29.678030", + "step": 4376, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008408193243667483, + "timestamp": "2025-10-01 03:26:29.682059", + "step": 4377, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:29.717842", + "step": 4377, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04060649871826172, + "timestamp": "2025-10-01 03:26:29.721903", + "step": 4378, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.760846", + "step": 4378, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021551089361310005, + "timestamp": "2025-10-01 03:26:29.763178", + "step": 4379, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:29.795592", + "step": 4379, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015996814472600818, + "timestamp": "2025-10-01 03:26:29.822234", + "step": 4380, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:29.860360", + "step": 4380, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03381815925240517, + "timestamp": "2025-10-01 03:26:29.862574", + "step": 4381, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:29.896994", + "step": 4381, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008771023713052273, + "timestamp": "2025-10-01 03:26:29.899297", + "step": 4382, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.948445", + "step": 4382, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004885204252786934, + "timestamp": "2025-10-01 03:26:29.950680", + "step": 4383, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:29.988944", + "step": 4383, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005339428782463074, + "timestamp": "2025-10-01 03:26:30.014061", + "step": 4384, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:30.049036", + "step": 4384, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00944127980619669, + "timestamp": "2025-10-01 03:26:30.051075", + "step": 4385, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.082407", + "step": 4385, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007271267473697662, + "timestamp": "2025-10-01 03:26:30.084669", + "step": 4386, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:30.115867", + "step": 4386, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027800800278782845, + "timestamp": "2025-10-01 03:26:30.118500", + "step": 4387, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:30.153927", + "step": 4387, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010875497944653034, + "timestamp": "2025-10-01 03:26:30.181401", + "step": 4388, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.212582", + "step": 4388, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005482938140630722, + "timestamp": "2025-10-01 03:26:30.214920", + "step": 4389, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.253266", + "step": 4389, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035558007657527924, + "timestamp": "2025-10-01 03:26:30.255557", + "step": 4390, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:30.293594", + "step": 4390, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03773922473192215, + "timestamp": "2025-10-01 03:26:30.295925", + "step": 4391, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.329669", + "step": 4391, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02781137451529503, + "timestamp": "2025-10-01 03:26:30.353440", + "step": 4392, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:30.385037", + "step": 4392, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008161294274032116, + "timestamp": "2025-10-01 03:26:30.387473", + "step": 4393, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.419733", + "step": 4393, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016609933227300644, + "timestamp": "2025-10-01 03:26:30.422869", + "step": 4394, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:30.454476", + "step": 4394, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014075756771489978, + "timestamp": "2025-10-01 03:26:30.456903", + "step": 4395, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.492117", + "step": 4395, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020266668871045113, + "timestamp": "2025-10-01 03:26:30.516466", + "step": 4396, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.549858", + "step": 4396, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020690830424427986, + "timestamp": "2025-10-01 03:26:30.552635", + "step": 4397, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.606107", + "step": 4397, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02121693082153797, + "timestamp": "2025-10-01 03:26:30.609036", + "step": 4398, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.642797", + "step": 4398, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004323144443333149, + "timestamp": "2025-10-01 03:26:30.645886", + "step": 4399, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:30.681674", + "step": 4399, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005290227476507425, + "timestamp": "2025-10-01 03:26:30.706347", + "step": 4400, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.741041", + "step": 4400, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022328374907374382, + "timestamp": "2025-10-01 03:26:30.743967", + "step": 4401, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.780115", + "step": 4401, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012795469723641872, + "timestamp": "2025-10-01 03:26:30.783306", + "step": 4402, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.819544", + "step": 4402, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0191594697535038, + "timestamp": "2025-10-01 03:26:30.821923", + "step": 4403, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.858311", + "step": 4403, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027825557626783848, + "timestamp": "2025-10-01 03:26:30.883583", + "step": 4404, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.918075", + "step": 4404, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012470372021198273, + "timestamp": "2025-10-01 03:26:30.921093", + "step": 4405, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:30.960005", + "step": 4405, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019948823377490044, + "timestamp": "2025-10-01 03:26:30.963230", + "step": 4406, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:31.010667", + "step": 4406, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03724580630660057, + "timestamp": "2025-10-01 03:26:31.013199", + "step": 4407, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:31.044682", + "step": 4407, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032483283430337906, + "timestamp": "2025-10-01 03:26:31.069164", + "step": 4408, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:26:33.709732", + "step": 4408, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2533859.969225505, + "timestamp": "2025-10-01 03:26:33.716528", + "step": 4408, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:33.750236", + "step": 4408, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011089926585555077, + "timestamp": "2025-10-01 03:26:33.752637", + "step": 4409, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:33.783827", + "step": 4409, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014973940327763557, + "timestamp": "2025-10-01 03:26:33.786134", + "step": 4410, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:33.817798", + "step": 4410, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02618636004626751, + "timestamp": "2025-10-01 03:26:33.819955", + "step": 4411, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:33.850580", + "step": 4411, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.08856023848056793, + "timestamp": "2025-10-01 03:26:33.874718", + "step": 4412, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:33.916238", + "step": 4412, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012898245826363564, + "timestamp": "2025-10-01 03:26:33.918710", + "step": 4413, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:33.950724", + "step": 4413, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025526046752929688, + "timestamp": "2025-10-01 03:26:33.953125", + "step": 4414, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:33.985096", + "step": 4414, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06084362417459488, + "timestamp": "2025-10-01 03:26:33.987390", + "step": 4415, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.024586", + "step": 4415, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009924081386998296, + "timestamp": "2025-10-01 03:26:34.048438", + "step": 4416, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:34.079062", + "step": 4416, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.040253788232803345, + "timestamp": "2025-10-01 03:26:34.081460", + "step": 4417, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:26:34.115609", + "step": 4417, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020557880401611328, + "timestamp": "2025-10-01 03:26:34.117973", + "step": 4418, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.149453", + "step": 4418, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010801391676068306, + "timestamp": "2025-10-01 03:26:34.151661", + "step": 4419, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.182803", + "step": 4419, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00596443098038435, + "timestamp": "2025-10-01 03:26:34.207028", + "step": 4420, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.242683", + "step": 4420, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007174444384872913, + "timestamp": "2025-10-01 03:26:34.244807", + "step": 4421, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.278786", + "step": 4421, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03316085785627365, + "timestamp": "2025-10-01 03:26:34.280915", + "step": 4422, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.311951", + "step": 4422, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02858801558613777, + "timestamp": "2025-10-01 03:26:34.314243", + "step": 4423, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.358855", + "step": 4423, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006425089202821255, + "timestamp": "2025-10-01 03:26:34.382876", + "step": 4424, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.414782", + "step": 4424, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00794488936662674, + "timestamp": "2025-10-01 03:26:34.417255", + "step": 4425, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.448725", + "step": 4425, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009934204630553722, + "timestamp": "2025-10-01 03:26:34.451034", + "step": 4426, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:34.482770", + "step": 4426, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01213841326534748, + "timestamp": "2025-10-01 03:26:34.485006", + "step": 4427, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.523951", + "step": 4427, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001366705633699894, + "timestamp": "2025-10-01 03:26:34.547810", + "step": 4428, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.580481", + "step": 4428, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010133308824151754, + "timestamp": "2025-10-01 03:26:34.582811", + "step": 4429, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.615060", + "step": 4429, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035334512591362, + "timestamp": "2025-10-01 03:26:34.617378", + "step": 4430, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.650661", + "step": 4430, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006002062000334263, + "timestamp": "2025-10-01 03:26:34.653103", + "step": 4431, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.687839", + "step": 4431, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030804264824837446, + "timestamp": "2025-10-01 03:26:34.711893", + "step": 4432, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.745076", + "step": 4432, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01720755733549595, + "timestamp": "2025-10-01 03:26:34.747429", + "step": 4433, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:34.778373", + "step": 4433, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016725684981793165, + "timestamp": "2025-10-01 03:26:34.780968", + "step": 4434, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.812998", + "step": 4434, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015361365862190723, + "timestamp": "2025-10-01 03:26:34.815434", + "step": 4435, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.854276", + "step": 4435, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019404664635658264, + "timestamp": "2025-10-01 03:26:34.878344", + "step": 4436, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.911156", + "step": 4436, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016340551897883415, + "timestamp": "2025-10-01 03:26:34.913429", + "step": 4437, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:34.946420", + "step": 4437, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006628156173974276, + "timestamp": "2025-10-01 03:26:34.949279", + "step": 4438, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:34.981149", + "step": 4438, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010568484663963318, + "timestamp": "2025-10-01 03:26:34.983884", + "step": 4439, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.018309", + "step": 4439, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03351050242781639, + "timestamp": "2025-10-01 03:26:35.042530", + "step": 4440, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:35.076210", + "step": 4440, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016209760680794716, + "timestamp": "2025-10-01 03:26:35.078672", + "step": 4441, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.112671", + "step": 4441, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020026424899697304, + "timestamp": "2025-10-01 03:26:35.115127", + "step": 4442, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:26:35.148112", + "step": 4442, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004859015345573425, + "timestamp": "2025-10-01 03:26:35.150380", + "step": 4443, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.183160", + "step": 4443, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012592273764312267, + "timestamp": "2025-10-01 03:26:35.206916", + "step": 4444, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.242512", + "step": 4444, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009236520156264305, + "timestamp": "2025-10-01 03:26:35.244815", + "step": 4445, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:35.289655", + "step": 4445, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03174101188778877, + "timestamp": "2025-10-01 03:26:35.292419", + "step": 4446, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:35.325093", + "step": 4446, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033827254083007574, + "timestamp": "2025-10-01 03:26:35.327586", + "step": 4447, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.360048", + "step": 4447, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005063436925411224, + "timestamp": "2025-10-01 03:26:35.384205", + "step": 4448, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:35.416553", + "step": 4448, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007101362105458975, + "timestamp": "2025-10-01 03:26:35.419081", + "step": 4449, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:35.465860", + "step": 4449, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025707002729177475, + "timestamp": "2025-10-01 03:26:35.468220", + "step": 4450, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.507830", + "step": 4450, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003953913983423263, + "timestamp": "2025-10-01 03:26:35.510174", + "step": 4451, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.555884", + "step": 4451, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005894234869629145, + "timestamp": "2025-10-01 03:26:35.579864", + "step": 4452, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.618709", + "step": 4452, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019808269571512938, + "timestamp": "2025-10-01 03:26:35.620997", + "step": 4453, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.651761", + "step": 4453, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014332549180835485, + "timestamp": "2025-10-01 03:26:35.653947", + "step": 4454, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.684168", + "step": 4454, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006503463257104158, + "timestamp": "2025-10-01 03:26:35.686392", + "step": 4455, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.719541", + "step": 4455, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010182269848883152, + "timestamp": "2025-10-01 03:26:35.743590", + "step": 4456, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.777171", + "step": 4456, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005163394380360842, + "timestamp": "2025-10-01 03:26:35.779544", + "step": 4457, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.810131", + "step": 4457, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009599360637366772, + "timestamp": "2025-10-01 03:26:35.812376", + "step": 4458, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.855192", + "step": 4458, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02348599024116993, + "timestamp": "2025-10-01 03:26:35.858191", + "step": 4459, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.889852", + "step": 4459, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007305982057005167, + "timestamp": "2025-10-01 03:26:35.914168", + "step": 4460, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.956316", + "step": 4460, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018545066704973578, + "timestamp": "2025-10-01 03:26:35.958663", + "step": 4461, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:35.989345", + "step": 4461, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003793039941228926, + "timestamp": "2025-10-01 03:26:35.993157", + "step": 4462, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.032290", + "step": 4462, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006170319858938456, + "timestamp": "2025-10-01 03:26:36.034648", + "step": 4463, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.066436", + "step": 4463, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004800801165401936, + "timestamp": "2025-10-01 03:26:36.090447", + "step": 4464, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.127758", + "step": 4464, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.037450917065143585, + "timestamp": "2025-10-01 03:26:36.130646", + "step": 4465, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.162153", + "step": 4465, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009302626131102443, + "timestamp": "2025-10-01 03:26:36.164583", + "step": 4466, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.196371", + "step": 4466, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.044753558933734894, + "timestamp": "2025-10-01 03:26:36.199243", + "step": 4467, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:36.230436", + "step": 4467, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007715749088674784, + "timestamp": "2025-10-01 03:26:36.254429", + "step": 4468, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.286863", + "step": 4468, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0069264634512364864, + "timestamp": "2025-10-01 03:26:36.289260", + "step": 4469, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.322364", + "step": 4469, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01066860556602478, + "timestamp": "2025-10-01 03:26:36.324747", + "step": 4470, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.358535", + "step": 4470, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018713930621743202, + "timestamp": "2025-10-01 03:26:36.361027", + "step": 4471, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.391524", + "step": 4471, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023830991238355637, + "timestamp": "2025-10-01 03:26:36.415282", + "step": 4472, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.452600", + "step": 4472, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035858319606631994, + "timestamp": "2025-10-01 03:26:36.454890", + "step": 4473, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:36.491258", + "step": 4473, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002928414149209857, + "timestamp": "2025-10-01 03:26:36.493895", + "step": 4474, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.526805", + "step": 4474, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005327022518031299, + "timestamp": "2025-10-01 03:26:36.529510", + "step": 4475, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.562806", + "step": 4475, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006951329996809363, + "timestamp": "2025-10-01 03:26:36.586948", + "step": 4476, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:36.621363", + "step": 4476, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02855062671005726, + "timestamp": "2025-10-01 03:26:36.624555", + "step": 4477, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.661398", + "step": 4477, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012926607392728329, + "timestamp": "2025-10-01 03:26:36.663880", + "step": 4478, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.698575", + "step": 4478, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06632322818040848, + "timestamp": "2025-10-01 03:26:36.701534", + "step": 4479, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:36.736932", + "step": 4479, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017674746050033718, + "timestamp": "2025-10-01 03:26:36.760621", + "step": 4480, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.795008", + "step": 4480, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004559385124593973, + "timestamp": "2025-10-01 03:26:36.797342", + "step": 4481, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.828608", + "step": 4481, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006855820771306753, + "timestamp": "2025-10-01 03:26:36.830941", + "step": 4482, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.861687", + "step": 4482, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00030537418206222355, + "timestamp": "2025-10-01 03:26:36.864149", + "step": 4483, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.897135", + "step": 4483, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005356173031032085, + "timestamp": "2025-10-01 03:26:36.920922", + "step": 4484, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.951627", + "step": 4484, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01754930429160595, + "timestamp": "2025-10-01 03:26:36.954531", + "step": 4485, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:36.988353", + "step": 4485, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005438476335257292, + "timestamp": "2025-10-01 03:26:36.990753", + "step": 4486, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:37.021245", + "step": 4486, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011762200854718685, + "timestamp": "2025-10-01 03:26:37.026218", + "step": 4487, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:37.057080", + "step": 4487, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001156643033027649, + "timestamp": "2025-10-01 03:26:37.080828", + "step": 4488, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:37.111329", + "step": 4488, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002818737179040909, + "timestamp": "2025-10-01 03:26:37.113940", + "step": 4489, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:37.155664", + "step": 4489, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009938294999301434, + "timestamp": "2025-10-01 03:26:37.158317", + "step": 4490, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:37.194018", + "step": 4490, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00038004457019269466, + "timestamp": "2025-10-01 03:26:37.196517", + "step": 4491, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:37.228771", + "step": 4491, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00018050716607831419, + "timestamp": "2025-10-01 03:26:37.252527", + "step": 4492, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:37.291229", + "step": 4492, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009923437610268593, + "timestamp": "2025-10-01 03:26:37.293432", + "step": 4493, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:37.332400", + "step": 4493, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014272789703682065, + "timestamp": "2025-10-01 03:26:37.335013", + "step": 4494, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:37.365934", + "step": 4494, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003081240283790976, + "timestamp": "2025-10-01 03:26:37.368216", + "step": 4495, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:37.402650", + "step": 4495, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015936229377985, + "timestamp": "2025-10-01 03:26:37.428648", + "step": 4496, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:37.462706", + "step": 4496, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007700436282902956, + "timestamp": "2025-10-01 03:26:37.465173", + "step": 4497, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:37.498528", + "step": 4497, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00211440771818161, + "timestamp": "2025-10-01 03:26:37.500837", + "step": 4498, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:37.537687", + "step": 4498, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018037527333945036, + "timestamp": "2025-10-01 03:26:37.540335", + "step": 4499, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:37.574742", + "step": 4499, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0394895002245903, + "timestamp": "2025-10-01 03:26:37.599361", + "step": 4500, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 4500", + "timestamp": "2025-10-01 03:26:42.364492", + "step": 4500, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:42.396678", + "step": 4500, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00048174679977819324, + "timestamp": "2025-10-01 03:26:42.399332", + "step": 4501, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:42.431737", + "step": 4501, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002760383067652583, + "timestamp": "2025-10-01 03:26:42.434206", + "step": 4502, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:42.467705", + "step": 4502, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000221631649765186, + "timestamp": "2025-10-01 03:26:42.470507", + "step": 4503, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:42.505162", + "step": 4503, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037399716675281525, + "timestamp": "2025-10-01 03:26:42.529092", + "step": 4504, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:42.564324", + "step": 4504, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.041258204728364944, + "timestamp": "2025-10-01 03:26:42.567134", + "step": 4505, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:26:42.599833", + "step": 4505, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00938081182539463, + "timestamp": "2025-10-01 03:26:42.602068", + "step": 4506, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:42.635670", + "step": 4506, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008263936266303062, + "timestamp": "2025-10-01 03:26:42.637868", + "step": 4507, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:42.671219", + "step": 4507, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02088402770459652, + "timestamp": "2025-10-01 03:26:42.695220", + "step": 4508, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:42.734415", + "step": 4508, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03196059539914131, + "timestamp": "2025-10-01 03:26:42.736757", + "step": 4509, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:42.773128", + "step": 4509, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013882875442504883, + "timestamp": "2025-10-01 03:26:42.775378", + "step": 4510, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:42.812948", + "step": 4510, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015280202496796846, + "timestamp": "2025-10-01 03:26:42.815227", + "step": 4511, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:42.857479", + "step": 4511, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008030885946936905, + "timestamp": "2025-10-01 03:26:42.881373", + "step": 4512, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:42.913733", + "step": 4512, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026413612067699432, + "timestamp": "2025-10-01 03:26:42.915933", + "step": 4513, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:42.951670", + "step": 4513, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012898794375360012, + "timestamp": "2025-10-01 03:26:42.954053", + "step": 4514, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:42.998382", + "step": 4514, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016733407974243164, + "timestamp": "2025-10-01 03:26:43.000905", + "step": 4515, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:43.033664", + "step": 4515, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0563264824450016, + "timestamp": "2025-10-01 03:26:43.057564", + "step": 4516, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.093399", + "step": 4516, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00037169159622862935, + "timestamp": "2025-10-01 03:26:43.095618", + "step": 4517, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.138317", + "step": 4517, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007045071688480675, + "timestamp": "2025-10-01 03:26:43.141151", + "step": 4518, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.173777", + "step": 4518, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018329151207581162, + "timestamp": "2025-10-01 03:26:43.176769", + "step": 4519, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.215418", + "step": 4519, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001990864984691143, + "timestamp": "2025-10-01 03:26:43.239175", + "step": 4520, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:43.273197", + "step": 4520, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003222766681574285, + "timestamp": "2025-10-01 03:26:43.275642", + "step": 4521, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.318283", + "step": 4521, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004472040105611086, + "timestamp": "2025-10-01 03:26:43.320537", + "step": 4522, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.357979", + "step": 4522, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015689078718423843, + "timestamp": "2025-10-01 03:26:43.362887", + "step": 4523, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:43.402376", + "step": 4523, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006082700565457344, + "timestamp": "2025-10-01 03:26:43.426223", + "step": 4524, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:43.457603", + "step": 4524, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028317075222730637, + "timestamp": "2025-10-01 03:26:43.460576", + "step": 4525, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.500651", + "step": 4525, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024431084748357534, + "timestamp": "2025-10-01 03:26:43.503074", + "step": 4526, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.534757", + "step": 4526, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008576986147090793, + "timestamp": "2025-10-01 03:26:43.537022", + "step": 4527, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.568129", + "step": 4527, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014227285282686353, + "timestamp": "2025-10-01 03:26:43.592851", + "step": 4528, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:43.626359", + "step": 4528, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00328523269854486, + "timestamp": "2025-10-01 03:26:43.628759", + "step": 4529, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:43.665073", + "step": 4529, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00043467519572004676, + "timestamp": "2025-10-01 03:26:43.667412", + "step": 4530, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.706304", + "step": 4530, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020120317116379738, + "timestamp": "2025-10-01 03:26:43.711024", + "step": 4531, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.746011", + "step": 4531, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001702168839983642, + "timestamp": "2025-10-01 03:26:43.770100", + "step": 4532, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:43.802714", + "step": 4532, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00018896478286478668, + "timestamp": "2025-10-01 03:26:43.805227", + "step": 4533, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.838332", + "step": 4533, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03196444734930992, + "timestamp": "2025-10-01 03:26:43.840883", + "step": 4534, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.873102", + "step": 4534, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002727848768699914, + "timestamp": "2025-10-01 03:26:43.876106", + "step": 4535, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.908101", + "step": 4535, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012574937427416444, + "timestamp": "2025-10-01 03:26:43.932098", + "step": 4536, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:43.971867", + "step": 4536, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006624086294323206, + "timestamp": "2025-10-01 03:26:43.974349", + "step": 4537, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.006667", + "step": 4537, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007699421839788556, + "timestamp": "2025-10-01 03:26:44.009163", + "step": 4538, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:44.052777", + "step": 4538, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04954271763563156, + "timestamp": "2025-10-01 03:26:44.055277", + "step": 4539, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:44.090353", + "step": 4539, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009267634013667703, + "timestamp": "2025-10-01 03:26:44.114253", + "step": 4540, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.145053", + "step": 4540, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001663715811446309, + "timestamp": "2025-10-01 03:26:44.147297", + "step": 4541, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.180161", + "step": 4541, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004193740023765713, + "timestamp": "2025-10-01 03:26:44.182493", + "step": 4542, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.213049", + "step": 4542, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00018731204909272492, + "timestamp": "2025-10-01 03:26:44.215401", + "step": 4543, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:44.249439", + "step": 4543, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022053371649235487, + "timestamp": "2025-10-01 03:26:44.273271", + "step": 4544, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.306308", + "step": 4544, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00641127722337842, + "timestamp": "2025-10-01 03:26:44.310288", + "step": 4545, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.343184", + "step": 4545, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.060218535363674164, + "timestamp": "2025-10-01 03:26:44.345522", + "step": 4546, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.389299", + "step": 4546, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00026747334050014615, + "timestamp": "2025-10-01 03:26:44.391751", + "step": 4547, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.423935", + "step": 4547, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012027138145640492, + "timestamp": "2025-10-01 03:26:44.448138", + "step": 4548, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:44.481567", + "step": 4548, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00043509015813469887, + "timestamp": "2025-10-01 03:26:44.483954", + "step": 4549, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.528968", + "step": 4549, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011592656373977661, + "timestamp": "2025-10-01 03:26:44.531193", + "step": 4550, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.566445", + "step": 4550, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015219329856336117, + "timestamp": "2025-10-01 03:26:44.568877", + "step": 4551, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.603375", + "step": 4551, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030720219016075134, + "timestamp": "2025-10-01 03:26:44.627179", + "step": 4552, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.660480", + "step": 4552, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004829203535337001, + "timestamp": "2025-10-01 03:26:44.662647", + "step": 4553, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:44.710148", + "step": 4553, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030020500998944044, + "timestamp": "2025-10-01 03:26:44.712481", + "step": 4554, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.745502", + "step": 4554, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004730469547212124, + "timestamp": "2025-10-01 03:26:44.747777", + "step": 4555, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.778783", + "step": 4555, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03748083487153053, + "timestamp": "2025-10-01 03:26:44.802513", + "step": 4556, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:44.840668", + "step": 4556, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00592714874073863, + "timestamp": "2025-10-01 03:26:44.842880", + "step": 4557, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.878649", + "step": 4557, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002349906280869618, + "timestamp": "2025-10-01 03:26:44.881109", + "step": 4558, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.921417", + "step": 4558, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0467384047806263, + "timestamp": "2025-10-01 03:26:44.924500", + "step": 4559, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:44.967561", + "step": 4559, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011259663151577115, + "timestamp": "2025-10-01 03:26:44.991216", + "step": 4560, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:26:47.599026", + "step": 4560, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2249762.711997045, + "timestamp": "2025-10-01 03:26:47.601433", + "step": 4560, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:47.630233", + "step": 4560, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004807208199054003, + "timestamp": "2025-10-01 03:26:47.632566", + "step": 4561, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:47.664458", + "step": 4561, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00016961789515335113, + "timestamp": "2025-10-01 03:26:47.666482", + "step": 4562, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:47.698667", + "step": 4562, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003411227953620255, + "timestamp": "2025-10-01 03:26:47.701250", + "step": 4563, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:47.731770", + "step": 4563, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00016805806080810726, + "timestamp": "2025-10-01 03:26:47.755658", + "step": 4564, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:47.786456", + "step": 4564, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032935980707407, + "timestamp": "2025-10-01 03:26:47.788645", + "step": 4565, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:47.819558", + "step": 4565, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004861738998442888, + "timestamp": "2025-10-01 03:26:47.821810", + "step": 4566, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:47.851781", + "step": 4566, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006216181209310889, + "timestamp": "2025-10-01 03:26:47.853977", + "step": 4567, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:47.892822", + "step": 4567, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012308242730796337, + "timestamp": "2025-10-01 03:26:47.916798", + "step": 4568, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:47.947070", + "step": 4568, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016434386372566223, + "timestamp": "2025-10-01 03:26:47.949379", + "step": 4569, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:47.982494", + "step": 4569, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006021831068210304, + "timestamp": "2025-10-01 03:26:47.984908", + "step": 4570, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.015875", + "step": 4570, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038155794609338045, + "timestamp": "2025-10-01 03:26:48.018074", + "step": 4571, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:48.048713", + "step": 4571, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001239198842085898, + "timestamp": "2025-10-01 03:26:48.072388", + "step": 4572, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.102448", + "step": 4572, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00018478620040696114, + "timestamp": "2025-10-01 03:26:48.104626", + "step": 4573, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.135636", + "step": 4573, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00023070175666362047, + "timestamp": "2025-10-01 03:26:48.137718", + "step": 4574, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.169750", + "step": 4574, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005800570361316204, + "timestamp": "2025-10-01 03:26:48.171795", + "step": 4575, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.201449", + "step": 4575, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004280527587980032, + "timestamp": "2025-10-01 03:26:48.225064", + "step": 4576, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.256128", + "step": 4576, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00045400112867355347, + "timestamp": "2025-10-01 03:26:48.258417", + "step": 4577, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.289238", + "step": 4577, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029816606547683477, + "timestamp": "2025-10-01 03:26:48.291459", + "step": 4578, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:48.321719", + "step": 4578, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024813353084027767, + "timestamp": "2025-10-01 03:26:48.324541", + "step": 4579, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.355202", + "step": 4579, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004240119887981564, + "timestamp": "2025-10-01 03:26:48.379017", + "step": 4580, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.409837", + "step": 4580, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003316403308417648, + "timestamp": "2025-10-01 03:26:48.412315", + "step": 4581, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.442578", + "step": 4581, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010621055262163281, + "timestamp": "2025-10-01 03:26:48.444717", + "step": 4582, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.475276", + "step": 4582, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030129658989608288, + "timestamp": "2025-10-01 03:26:48.477648", + "step": 4583, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.508469", + "step": 4583, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017366712912917137, + "timestamp": "2025-10-01 03:26:48.532379", + "step": 4584, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.562831", + "step": 4584, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020955262705683708, + "timestamp": "2025-10-01 03:26:48.565300", + "step": 4585, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.605977", + "step": 4585, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0042304969392716885, + "timestamp": "2025-10-01 03:26:48.608315", + "step": 4586, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:48.638795", + "step": 4586, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011418581940233707, + "timestamp": "2025-10-01 03:26:48.641100", + "step": 4587, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.671262", + "step": 4587, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014628268545493484, + "timestamp": "2025-10-01 03:26:48.694953", + "step": 4588, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.725931", + "step": 4588, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012695102486759424, + "timestamp": "2025-10-01 03:26:48.728311", + "step": 4589, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.759258", + "step": 4589, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005569060333073139, + "timestamp": "2025-10-01 03:26:48.761363", + "step": 4590, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.791538", + "step": 4590, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005848808796145022, + "timestamp": "2025-10-01 03:26:48.793688", + "step": 4591, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.824410", + "step": 4591, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03146940842270851, + "timestamp": "2025-10-01 03:26:48.847994", + "step": 4592, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.877747", + "step": 4592, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001149914925917983, + "timestamp": "2025-10-01 03:26:48.880881", + "step": 4593, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.911217", + "step": 4593, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004322185646742582, + "timestamp": "2025-10-01 03:26:48.913598", + "step": 4594, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.945555", + "step": 4594, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005164697766304016, + "timestamp": "2025-10-01 03:26:48.947857", + "step": 4595, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:48.978110", + "step": 4595, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00039524593739770353, + "timestamp": "2025-10-01 03:26:49.001804", + "step": 4596, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.031978", + "step": 4596, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00045587978092953563, + "timestamp": "2025-10-01 03:26:49.034038", + "step": 4597, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.064016", + "step": 4597, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000494872045237571, + "timestamp": "2025-10-01 03:26:49.066312", + "step": 4598, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.097733", + "step": 4598, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010320190340280533, + "timestamp": "2025-10-01 03:26:49.100012", + "step": 4599, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.129985", + "step": 4599, + "epoch": 2 + }, + { + "type": "loss", + "content": 7.12078035576269e-05, + "timestamp": "2025-10-01 03:26:49.153855", + "step": 4600, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.185127", + "step": 4600, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009948541410267353, + "timestamp": "2025-10-01 03:26:49.187229", + "step": 4601, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.217607", + "step": 4601, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012563330819830298, + "timestamp": "2025-10-01 03:26:49.219646", + "step": 4602, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.250858", + "step": 4602, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010612019337713718, + "timestamp": "2025-10-01 03:26:49.253258", + "step": 4603, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.283815", + "step": 4603, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014492636546492577, + "timestamp": "2025-10-01 03:26:49.307408", + "step": 4604, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.337885", + "step": 4604, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007529438007622957, + "timestamp": "2025-10-01 03:26:49.340198", + "step": 4605, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.371845", + "step": 4605, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005593550740741193, + "timestamp": "2025-10-01 03:26:49.377274", + "step": 4606, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.413110", + "step": 4606, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002284688816871494, + "timestamp": "2025-10-01 03:26:49.415378", + "step": 4607, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.446776", + "step": 4607, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0056832535192370415, + "timestamp": "2025-10-01 03:26:49.470601", + "step": 4608, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.500969", + "step": 4608, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006613036966882646, + "timestamp": "2025-10-01 03:26:49.503349", + "step": 4609, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:49.534238", + "step": 4609, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012416682438924909, + "timestamp": "2025-10-01 03:26:49.536465", + "step": 4610, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.568468", + "step": 4610, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027787729632109404, + "timestamp": "2025-10-01 03:26:49.570643", + "step": 4611, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.600915", + "step": 4611, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00020908766600769013, + "timestamp": "2025-10-01 03:26:49.624806", + "step": 4612, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:49.655256", + "step": 4612, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012274959590286016, + "timestamp": "2025-10-01 03:26:49.657615", + "step": 4613, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.688091", + "step": 4613, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0044857715256512165, + "timestamp": "2025-10-01 03:26:49.690298", + "step": 4614, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.720714", + "step": 4614, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009489265503361821, + "timestamp": "2025-10-01 03:26:49.723019", + "step": 4615, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.753705", + "step": 4615, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001230392517754808, + "timestamp": "2025-10-01 03:26:49.777143", + "step": 4616, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.807787", + "step": 4616, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002704027574509382, + "timestamp": "2025-10-01 03:26:49.810194", + "step": 4617, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.840265", + "step": 4617, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.10629473626613617, + "timestamp": "2025-10-01 03:26:49.846119", + "step": 4618, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:49.880596", + "step": 4618, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00015917373821139336, + "timestamp": "2025-10-01 03:26:49.886774", + "step": 4619, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.929909", + "step": 4619, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003421745204832405, + "timestamp": "2025-10-01 03:26:49.954193", + "step": 4620, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:49.985828", + "step": 4620, + "epoch": 2 + }, + { + "type": "loss", + "content": 7.727140473434702e-05, + "timestamp": "2025-10-01 03:26:49.988009", + "step": 4621, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.018263", + "step": 4621, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012072051176801324, + "timestamp": "2025-10-01 03:26:50.020497", + "step": 4622, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:50.052124", + "step": 4622, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005983858718536794, + "timestamp": "2025-10-01 03:26:50.054266", + "step": 4623, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.084767", + "step": 4623, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005061435513198376, + "timestamp": "2025-10-01 03:26:50.108507", + "step": 4624, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.138849", + "step": 4624, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004932482843287289, + "timestamp": "2025-10-01 03:26:50.142313", + "step": 4625, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.175074", + "step": 4625, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005341615178622305, + "timestamp": "2025-10-01 03:26:50.177374", + "step": 4626, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.208742", + "step": 4626, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00029736137366853654, + "timestamp": "2025-10-01 03:26:50.211253", + "step": 4627, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.241343", + "step": 4627, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008218669681809843, + "timestamp": "2025-10-01 03:26:50.265421", + "step": 4628, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.296064", + "step": 4628, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012862840958405286, + "timestamp": "2025-10-01 03:26:50.298390", + "step": 4629, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.328221", + "step": 4629, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03195691481232643, + "timestamp": "2025-10-01 03:26:50.331471", + "step": 4630, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.361324", + "step": 4630, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003992435522377491, + "timestamp": "2025-10-01 03:26:50.363441", + "step": 4631, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:50.394869", + "step": 4631, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01120931189507246, + "timestamp": "2025-10-01 03:26:50.418624", + "step": 4632, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:50.449360", + "step": 4632, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002370605943724513, + "timestamp": "2025-10-01 03:26:50.451574", + "step": 4633, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:50.482563", + "step": 4633, + "epoch": 2 + }, + { + "type": "loss", + "content": 5.778595732408576e-05, + "timestamp": "2025-10-01 03:26:50.484793", + "step": 4634, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.515322", + "step": 4634, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005490258918143809, + "timestamp": "2025-10-01 03:26:50.517426", + "step": 4635, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.549323", + "step": 4635, + "epoch": 2 + }, + { + "type": "loss", + "content": 9.934836998581886e-05, + "timestamp": "2025-10-01 03:26:50.573347", + "step": 4636, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.604702", + "step": 4636, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007809824892319739, + "timestamp": "2025-10-01 03:26:50.606995", + "step": 4637, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.637301", + "step": 4637, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009299732628278434, + "timestamp": "2025-10-01 03:26:50.639765", + "step": 4638, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.671163", + "step": 4638, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011446615681052208, + "timestamp": "2025-10-01 03:26:50.673453", + "step": 4639, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:50.703352", + "step": 4639, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01316857896745205, + "timestamp": "2025-10-01 03:26:50.727161", + "step": 4640, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.757852", + "step": 4640, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001848112209700048, + "timestamp": "2025-10-01 03:26:50.760245", + "step": 4641, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.790589", + "step": 4641, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04524293541908264, + "timestamp": "2025-10-01 03:26:50.792909", + "step": 4642, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.823163", + "step": 4642, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029643469024449587, + "timestamp": "2025-10-01 03:26:50.825536", + "step": 4643, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:50.856030", + "step": 4643, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017071088077500463, + "timestamp": "2025-10-01 03:26:50.879859", + "step": 4644, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:50.910610", + "step": 4644, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006902364082634449, + "timestamp": "2025-10-01 03:26:50.913138", + "step": 4645, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.950835", + "step": 4645, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04152069240808487, + "timestamp": "2025-10-01 03:26:50.953056", + "step": 4646, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:50.985096", + "step": 4646, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025150451809167862, + "timestamp": "2025-10-01 03:26:50.987395", + "step": 4647, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.017160", + "step": 4647, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008984723826870322, + "timestamp": "2025-10-01 03:26:51.040842", + "step": 4648, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.072864", + "step": 4648, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0449872724711895, + "timestamp": "2025-10-01 03:26:51.074989", + "step": 4649, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.105531", + "step": 4649, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018707705894485116, + "timestamp": "2025-10-01 03:26:51.107660", + "step": 4650, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.137780", + "step": 4650, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010371615644544363, + "timestamp": "2025-10-01 03:26:51.139953", + "step": 4651, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.170432", + "step": 4651, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011388265993446112, + "timestamp": "2025-10-01 03:26:51.194116", + "step": 4652, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:51.224562", + "step": 4652, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00041975005296990275, + "timestamp": "2025-10-01 03:26:51.227111", + "step": 4653, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.257250", + "step": 4653, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00023312617850024253, + "timestamp": "2025-10-01 03:26:51.259457", + "step": 4654, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:51.290424", + "step": 4654, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003010467393323779, + "timestamp": "2025-10-01 03:26:51.292837", + "step": 4655, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:51.324546", + "step": 4655, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012703417451120913, + "timestamp": "2025-10-01 03:26:51.348163", + "step": 4656, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.380888", + "step": 4656, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036788242869079113, + "timestamp": "2025-10-01 03:26:51.388339", + "step": 4657, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.419949", + "step": 4657, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00011118348629679531, + "timestamp": "2025-10-01 03:26:51.424243", + "step": 4658, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.457485", + "step": 4658, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004309350624680519, + "timestamp": "2025-10-01 03:26:51.459869", + "step": 4659, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.492084", + "step": 4659, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009180715424008667, + "timestamp": "2025-10-01 03:26:51.515762", + "step": 4660, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.548500", + "step": 4660, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005374032771214843, + "timestamp": "2025-10-01 03:26:51.550827", + "step": 4661, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.581019", + "step": 4661, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014428733848035336, + "timestamp": "2025-10-01 03:26:51.583184", + "step": 4662, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.614021", + "step": 4662, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014931418932974339, + "timestamp": "2025-10-01 03:26:51.616369", + "step": 4663, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.646518", + "step": 4663, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06174100562930107, + "timestamp": "2025-10-01 03:26:51.670638", + "step": 4664, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.701852", + "step": 4664, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00675552012398839, + "timestamp": "2025-10-01 03:26:51.703945", + "step": 4665, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.734378", + "step": 4665, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005031402106396854, + "timestamp": "2025-10-01 03:26:51.737041", + "step": 4666, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.766831", + "step": 4666, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003661158261820674, + "timestamp": "2025-10-01 03:26:51.769126", + "step": 4667, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.799561", + "step": 4667, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00023693733965046704, + "timestamp": "2025-10-01 03:26:51.823229", + "step": 4668, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.855323", + "step": 4668, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04730822890996933, + "timestamp": "2025-10-01 03:26:51.857384", + "step": 4669, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.887841", + "step": 4669, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03976264223456383, + "timestamp": "2025-10-01 03:26:51.890073", + "step": 4670, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.920995", + "step": 4670, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007602334371767938, + "timestamp": "2025-10-01 03:26:51.923605", + "step": 4671, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:51.957285", + "step": 4671, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027815166860818863, + "timestamp": "2025-10-01 03:26:51.981054", + "step": 4672, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.011940", + "step": 4672, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010826170444488525, + "timestamp": "2025-10-01 03:26:52.014335", + "step": 4673, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.044282", + "step": 4673, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011568829417228699, + "timestamp": "2025-10-01 03:26:52.046570", + "step": 4674, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.076575", + "step": 4674, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001755012315697968, + "timestamp": "2025-10-01 03:26:52.078795", + "step": 4675, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.109293", + "step": 4675, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00038998047239147127, + "timestamp": "2025-10-01 03:26:52.133226", + "step": 4676, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.167953", + "step": 4676, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01058237161487341, + "timestamp": "2025-10-01 03:26:52.170092", + "step": 4677, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.201402", + "step": 4677, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008457060903310776, + "timestamp": "2025-10-01 03:26:52.203690", + "step": 4678, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.233861", + "step": 4678, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001321314019151032, + "timestamp": "2025-10-01 03:26:52.235970", + "step": 4679, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.266329", + "step": 4679, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021401233971118927, + "timestamp": "2025-10-01 03:26:52.289975", + "step": 4680, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.320177", + "step": 4680, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006755082868039608, + "timestamp": "2025-10-01 03:26:52.322355", + "step": 4681, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.352337", + "step": 4681, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013181083835661411, + "timestamp": "2025-10-01 03:26:52.354979", + "step": 4682, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.389826", + "step": 4682, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05415748432278633, + "timestamp": "2025-10-01 03:26:52.392045", + "step": 4683, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.421833", + "step": 4683, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004307625349611044, + "timestamp": "2025-10-01 03:26:52.445590", + "step": 4684, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.477033", + "step": 4684, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019888137467205524, + "timestamp": "2025-10-01 03:26:52.479635", + "step": 4685, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:52.513270", + "step": 4685, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022979045752435923, + "timestamp": "2025-10-01 03:26:52.515616", + "step": 4686, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.546687", + "step": 4686, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004576229956001043, + "timestamp": "2025-10-01 03:26:52.548930", + "step": 4687, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:52.579842", + "step": 4687, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010192198678851128, + "timestamp": "2025-10-01 03:26:52.603527", + "step": 4688, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.637142", + "step": 4688, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06170213222503662, + "timestamp": "2025-10-01 03:26:52.639428", + "step": 4689, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.669874", + "step": 4689, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.034673143178224564, + "timestamp": "2025-10-01 03:26:52.672313", + "step": 4690, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.702078", + "step": 4690, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018265580292791128, + "timestamp": "2025-10-01 03:26:52.705634", + "step": 4691, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:52.736122", + "step": 4691, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028626281768083572, + "timestamp": "2025-10-01 03:26:52.759962", + "step": 4692, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:52.790677", + "step": 4692, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017858216306194663, + "timestamp": "2025-10-01 03:26:52.792861", + "step": 4693, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.822775", + "step": 4693, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025632504373788834, + "timestamp": "2025-10-01 03:26:52.825039", + "step": 4694, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:52.855418", + "step": 4694, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004526097793132067, + "timestamp": "2025-10-01 03:26:52.857510", + "step": 4695, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:52.887936", + "step": 4695, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003525275271385908, + "timestamp": "2025-10-01 03:26:52.911717", + "step": 4696, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.942207", + "step": 4696, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009305506944656372, + "timestamp": "2025-10-01 03:26:52.946923", + "step": 4697, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:52.976937", + "step": 4697, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004633672069758177, + "timestamp": "2025-10-01 03:26:52.979210", + "step": 4698, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:53.010286", + "step": 4698, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019695354625582695, + "timestamp": "2025-10-01 03:26:53.012984", + "step": 4699, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:53.043666", + "step": 4699, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014088618569076061, + "timestamp": "2025-10-01 03:26:53.067823", + "step": 4700, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:53.098670", + "step": 4700, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004279715474694967, + "timestamp": "2025-10-01 03:26:53.101087", + "step": 4701, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:53.131835", + "step": 4701, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.036763548851013184, + "timestamp": "2025-10-01 03:26:53.134134", + "step": 4702, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:53.167363", + "step": 4702, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03050510212779045, + "timestamp": "2025-10-01 03:26:53.169467", + "step": 4703, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:53.200516", + "step": 4703, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004666267894208431, + "timestamp": "2025-10-01 03:26:53.224392", + "step": 4704, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:53.254283", + "step": 4704, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015432065352797508, + "timestamp": "2025-10-01 03:26:53.257296", + "step": 4705, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:53.289191", + "step": 4705, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04070205241441727, + "timestamp": "2025-10-01 03:26:53.291956", + "step": 4706, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:53.325132", + "step": 4706, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026774213183671236, + "timestamp": "2025-10-01 03:26:53.328459", + "step": 4707, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:53.360555", + "step": 4707, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017210181802511215, + "timestamp": "2025-10-01 03:26:53.384650", + "step": 4708, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:53.419074", + "step": 4708, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008705638349056244, + "timestamp": "2025-10-01 03:26:53.421900", + "step": 4709, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:53.454335", + "step": 4709, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015270374715328217, + "timestamp": "2025-10-01 03:26:53.457010", + "step": 4710, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:53.488097", + "step": 4710, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01600327156484127, + "timestamp": "2025-10-01 03:26:53.491646", + "step": 4711, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:53.524139", + "step": 4711, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001695294864475727, + "timestamp": "2025-10-01 03:26:53.549689", + "step": 4712, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:26:55.797795", + "step": 4712, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2195824.773205557, + "timestamp": "2025-10-01 03:26:55.800383", + "step": 4712, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:55.829271", + "step": 4712, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006987645756453276, + "timestamp": "2025-10-01 03:26:55.831375", + "step": 4713, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:55.862476", + "step": 4713, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027021756395697594, + "timestamp": "2025-10-01 03:26:55.864947", + "step": 4714, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:55.896189", + "step": 4714, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027401826810091734, + "timestamp": "2025-10-01 03:26:55.898313", + "step": 4715, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:55.937624", + "step": 4715, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001307074329815805, + "timestamp": "2025-10-01 03:26:55.961508", + "step": 4716, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:55.992232", + "step": 4716, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008763567544519901, + "timestamp": "2025-10-01 03:26:55.995404", + "step": 4717, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.027780", + "step": 4717, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015282622538506985, + "timestamp": "2025-10-01 03:26:56.030204", + "step": 4718, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.060350", + "step": 4718, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009659395436756313, + "timestamp": "2025-10-01 03:26:56.062677", + "step": 4719, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.093271", + "step": 4719, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.037141598761081696, + "timestamp": "2025-10-01 03:26:56.117071", + "step": 4720, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.160033", + "step": 4720, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002382307779043913, + "timestamp": "2025-10-01 03:26:56.163904", + "step": 4721, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.194395", + "step": 4721, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012789247557520866, + "timestamp": "2025-10-01 03:26:56.196498", + "step": 4722, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.240067", + "step": 4722, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01258443109691143, + "timestamp": "2025-10-01 03:26:56.242339", + "step": 4723, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.273463", + "step": 4723, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002607449423521757, + "timestamp": "2025-10-01 03:26:56.297436", + "step": 4724, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:56.328544", + "step": 4724, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010705774649977684, + "timestamp": "2025-10-01 03:26:56.330830", + "step": 4725, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:56.361688", + "step": 4725, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05286660045385361, + "timestamp": "2025-10-01 03:26:56.363913", + "step": 4726, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.396549", + "step": 4726, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0075346059165894985, + "timestamp": "2025-10-01 03:26:56.398650", + "step": 4727, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:26:56.431644", + "step": 4727, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002162033459171653, + "timestamp": "2025-10-01 03:26:56.455403", + "step": 4728, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.487158", + "step": 4728, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008827258832752705, + "timestamp": "2025-10-01 03:26:56.489475", + "step": 4729, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.520352", + "step": 4729, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016242889687418938, + "timestamp": "2025-10-01 03:26:56.522554", + "step": 4730, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.553119", + "step": 4730, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.038186218589544296, + "timestamp": "2025-10-01 03:26:56.555617", + "step": 4731, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.587065", + "step": 4731, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016193155199289322, + "timestamp": "2025-10-01 03:26:56.610964", + "step": 4732, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.642543", + "step": 4732, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002832641126587987, + "timestamp": "2025-10-01 03:26:56.644853", + "step": 4733, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.675426", + "step": 4733, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016282129799947143, + "timestamp": "2025-10-01 03:26:56.677702", + "step": 4734, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:56.708297", + "step": 4734, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012366866692900658, + "timestamp": "2025-10-01 03:26:56.710502", + "step": 4735, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.741389", + "step": 4735, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006706431973725557, + "timestamp": "2025-10-01 03:26:56.765183", + "step": 4736, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.795735", + "step": 4736, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0071798586286604404, + "timestamp": "2025-10-01 03:26:56.798061", + "step": 4737, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.828213", + "step": 4737, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011525701265782118, + "timestamp": "2025-10-01 03:26:56.830398", + "step": 4738, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.862679", + "step": 4738, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027487855404615402, + "timestamp": "2025-10-01 03:26:56.865146", + "step": 4739, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.895279", + "step": 4739, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017726385965943336, + "timestamp": "2025-10-01 03:26:56.919182", + "step": 4740, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.949437", + "step": 4740, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.041015177965164185, + "timestamp": "2025-10-01 03:26:56.954191", + "step": 4741, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:56.985476", + "step": 4741, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019089862471446395, + "timestamp": "2025-10-01 03:26:56.987753", + "step": 4742, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.017639", + "step": 4742, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00329802418127656, + "timestamp": "2025-10-01 03:26:57.019826", + "step": 4743, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.050545", + "step": 4743, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012458967044949532, + "timestamp": "2025-10-01 03:26:57.074122", + "step": 4744, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.104885", + "step": 4744, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019440142204985023, + "timestamp": "2025-10-01 03:26:57.107280", + "step": 4745, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.138203", + "step": 4745, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029035134240984917, + "timestamp": "2025-10-01 03:26:57.140665", + "step": 4746, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.171451", + "step": 4746, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005945912562310696, + "timestamp": "2025-10-01 03:26:57.173850", + "step": 4747, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.204063", + "step": 4747, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0048279729671776295, + "timestamp": "2025-10-01 03:26:57.228221", + "step": 4748, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:57.259521", + "step": 4748, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.038917653262615204, + "timestamp": "2025-10-01 03:26:57.262074", + "step": 4749, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.293313", + "step": 4749, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030699951574206352, + "timestamp": "2025-10-01 03:26:57.295374", + "step": 4750, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.326563", + "step": 4750, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009374948567710817, + "timestamp": "2025-10-01 03:26:57.329498", + "step": 4751, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:57.359867", + "step": 4751, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024961939081549644, + "timestamp": "2025-10-01 03:26:57.384751", + "step": 4752, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.416003", + "step": 4752, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01236541848629713, + "timestamp": "2025-10-01 03:26:57.418368", + "step": 4753, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.449835", + "step": 4753, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002708888496272266, + "timestamp": "2025-10-01 03:26:57.453016", + "step": 4754, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:57.483609", + "step": 4754, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008670494891703129, + "timestamp": "2025-10-01 03:26:57.485845", + "step": 4755, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.515925", + "step": 4755, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016259559197351336, + "timestamp": "2025-10-01 03:26:57.539948", + "step": 4756, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.571696", + "step": 4756, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014168728375807405, + "timestamp": "2025-10-01 03:26:57.573568", + "step": 4757, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.607268", + "step": 4757, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001718592015095055, + "timestamp": "2025-10-01 03:26:57.609657", + "step": 4758, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.641930", + "step": 4758, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010168826207518578, + "timestamp": "2025-10-01 03:26:57.644543", + "step": 4759, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.683715", + "step": 4759, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009398083202540874, + "timestamp": "2025-10-01 03:26:57.707445", + "step": 4760, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.737972", + "step": 4760, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006041620508767664, + "timestamp": "2025-10-01 03:26:57.740637", + "step": 4761, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.771844", + "step": 4761, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026466628536581993, + "timestamp": "2025-10-01 03:26:57.774638", + "step": 4762, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.804845", + "step": 4762, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004792117979377508, + "timestamp": "2025-10-01 03:26:57.806948", + "step": 4763, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.836914", + "step": 4763, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002319177845492959, + "timestamp": "2025-10-01 03:26:57.860804", + "step": 4764, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.891363", + "step": 4764, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015463853487744927, + "timestamp": "2025-10-01 03:26:57.893706", + "step": 4765, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.924130", + "step": 4765, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01707622781395912, + "timestamp": "2025-10-01 03:26:57.926368", + "step": 4766, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:57.956916", + "step": 4766, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021501686424016953, + "timestamp": "2025-10-01 03:26:57.959884", + "step": 4767, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:57.990234", + "step": 4767, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014343759045004845, + "timestamp": "2025-10-01 03:26:58.014036", + "step": 4768, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.044860", + "step": 4768, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001682551926933229, + "timestamp": "2025-10-01 03:26:58.047140", + "step": 4769, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.079171", + "step": 4769, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003696667728945613, + "timestamp": "2025-10-01 03:26:58.081562", + "step": 4770, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:58.111670", + "step": 4770, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003086792305111885, + "timestamp": "2025-10-01 03:26:58.113860", + "step": 4771, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.144220", + "step": 4771, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03958642855286598, + "timestamp": "2025-10-01 03:26:58.167908", + "step": 4772, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.199316", + "step": 4772, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024640243500471115, + "timestamp": "2025-10-01 03:26:58.201404", + "step": 4773, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.231507", + "step": 4773, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010202718898653984, + "timestamp": "2025-10-01 03:26:58.233821", + "step": 4774, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.267879", + "step": 4774, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005753586301580071, + "timestamp": "2025-10-01 03:26:58.270538", + "step": 4775, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.302774", + "step": 4775, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008948713541030884, + "timestamp": "2025-10-01 03:26:58.326496", + "step": 4776, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.356702", + "step": 4776, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020880237221717834, + "timestamp": "2025-10-01 03:26:58.359640", + "step": 4777, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.390102", + "step": 4777, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0941094234585762, + "timestamp": "2025-10-01 03:26:58.392309", + "step": 4778, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.423537", + "step": 4778, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02093089558184147, + "timestamp": "2025-10-01 03:26:58.425744", + "step": 4779, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:58.457110", + "step": 4779, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007420045789331198, + "timestamp": "2025-10-01 03:26:58.480877", + "step": 4780, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.511407", + "step": 4780, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01174645870923996, + "timestamp": "2025-10-01 03:26:58.513750", + "step": 4781, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:58.545472", + "step": 4781, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02464292384684086, + "timestamp": "2025-10-01 03:26:58.547751", + "step": 4782, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:58.578371", + "step": 4782, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012898587621748447, + "timestamp": "2025-10-01 03:26:58.581021", + "step": 4783, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.613357", + "step": 4783, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.037235088646411896, + "timestamp": "2025-10-01 03:26:58.636909", + "step": 4784, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:58.667808", + "step": 4784, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00985915120691061, + "timestamp": "2025-10-01 03:26:58.670029", + "step": 4785, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.701136", + "step": 4785, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026099497452378273, + "timestamp": "2025-10-01 03:26:58.703256", + "step": 4786, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.733902", + "step": 4786, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012067530769854784, + "timestamp": "2025-10-01 03:26:58.736122", + "step": 4787, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.766688", + "step": 4787, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008482174016535282, + "timestamp": "2025-10-01 03:26:58.790424", + "step": 4788, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.821161", + "step": 4788, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00544446986168623, + "timestamp": "2025-10-01 03:26:58.823313", + "step": 4789, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:26:58.854964", + "step": 4789, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04354796186089516, + "timestamp": "2025-10-01 03:26:58.857775", + "step": 4790, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.890528", + "step": 4790, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003480639134068042, + "timestamp": "2025-10-01 03:26:58.892962", + "step": 4791, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:58.924245", + "step": 4791, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008663206361234188, + "timestamp": "2025-10-01 03:26:58.948308", + "step": 4792, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:58.980580", + "step": 4792, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03502395376563072, + "timestamp": "2025-10-01 03:26:58.983638", + "step": 4793, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:59.015673", + "step": 4793, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008886295254342258, + "timestamp": "2025-10-01 03:26:59.018122", + "step": 4794, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.049135", + "step": 4794, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013147711753845215, + "timestamp": "2025-10-01 03:26:59.051249", + "step": 4795, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.082033", + "step": 4795, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018643023446202278, + "timestamp": "2025-10-01 03:26:59.106930", + "step": 4796, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.138104", + "step": 4796, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016699235886335373, + "timestamp": "2025-10-01 03:26:59.140547", + "step": 4797, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.170949", + "step": 4797, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00907555315643549, + "timestamp": "2025-10-01 03:26:59.173642", + "step": 4798, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.203797", + "step": 4798, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05001586675643921, + "timestamp": "2025-10-01 03:26:59.206006", + "step": 4799, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.243602", + "step": 4799, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037219617515802383, + "timestamp": "2025-10-01 03:26:59.267450", + "step": 4800, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.297658", + "step": 4800, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01041671447455883, + "timestamp": "2025-10-01 03:26:59.300079", + "step": 4801, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.330914", + "step": 4801, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002659778343513608, + "timestamp": "2025-10-01 03:26:59.333116", + "step": 4802, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.363705", + "step": 4802, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013810945674777031, + "timestamp": "2025-10-01 03:26:59.365830", + "step": 4803, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.395812", + "step": 4803, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013162444345653057, + "timestamp": "2025-10-01 03:26:59.419439", + "step": 4804, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.449905", + "step": 4804, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007907168008387089, + "timestamp": "2025-10-01 03:26:59.452457", + "step": 4805, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.482895", + "step": 4805, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006179798394441605, + "timestamp": "2025-10-01 03:26:59.485148", + "step": 4806, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.516385", + "step": 4806, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035905856639146805, + "timestamp": "2025-10-01 03:26:59.518879", + "step": 4807, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.549315", + "step": 4807, + "epoch": 2 + }, + { + "type": "loss", + "content": 9.164827497443184e-05, + "timestamp": "2025-10-01 03:26:59.573360", + "step": 4808, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.604238", + "step": 4808, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021744067780673504, + "timestamp": "2025-10-01 03:26:59.606566", + "step": 4809, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:26:59.647609", + "step": 4809, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02663515694439411, + "timestamp": "2025-10-01 03:26:59.652643", + "step": 4810, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.691340", + "step": 4810, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010115385986864567, + "timestamp": "2025-10-01 03:26:59.693753", + "step": 4811, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.724589", + "step": 4811, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009528905502520502, + "timestamp": "2025-10-01 03:26:59.748587", + "step": 4812, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:26:59.779173", + "step": 4812, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005212283693253994, + "timestamp": "2025-10-01 03:26:59.782153", + "step": 4813, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.815739", + "step": 4813, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00177581247407943, + "timestamp": "2025-10-01 03:26:59.818039", + "step": 4814, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.848831", + "step": 4814, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013545684516429901, + "timestamp": "2025-10-01 03:26:59.851039", + "step": 4815, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.881815", + "step": 4815, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015441206051036716, + "timestamp": "2025-10-01 03:26:59.906035", + "step": 4816, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.937104", + "step": 4816, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016536075621843338, + "timestamp": "2025-10-01 03:26:59.939496", + "step": 4817, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:26:59.970237", + "step": 4817, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014190796064212918, + "timestamp": "2025-10-01 03:26:59.972501", + "step": 4818, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:00.003370", + "step": 4818, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019007184309884906, + "timestamp": "2025-10-01 03:27:00.005586", + "step": 4819, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.035840", + "step": 4819, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02846238948404789, + "timestamp": "2025-10-01 03:27:00.059690", + "step": 4820, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.102281", + "step": 4820, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00024008903710637242, + "timestamp": "2025-10-01 03:27:00.104434", + "step": 4821, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.135793", + "step": 4821, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00015735176566522568, + "timestamp": "2025-10-01 03:27:00.138169", + "step": 4822, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:00.169878", + "step": 4822, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005509443115442991, + "timestamp": "2025-10-01 03:27:00.172101", + "step": 4823, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.203781", + "step": 4823, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005540918791666627, + "timestamp": "2025-10-01 03:27:00.227239", + "step": 4824, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.257701", + "step": 4824, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002728766296058893, + "timestamp": "2025-10-01 03:27:00.260001", + "step": 4825, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.295782", + "step": 4825, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005486640729941428, + "timestamp": "2025-10-01 03:27:00.298103", + "step": 4826, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:00.330716", + "step": 4826, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010778585448861122, + "timestamp": "2025-10-01 03:27:00.333092", + "step": 4827, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.364178", + "step": 4827, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014677646569907665, + "timestamp": "2025-10-01 03:27:00.387864", + "step": 4828, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.427628", + "step": 4828, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012755183270201087, + "timestamp": "2025-10-01 03:27:00.429913", + "step": 4829, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.460294", + "step": 4829, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00808808021247387, + "timestamp": "2025-10-01 03:27:00.462412", + "step": 4830, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.494403", + "step": 4830, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0082341767847538, + "timestamp": "2025-10-01 03:27:00.496495", + "step": 4831, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.527001", + "step": 4831, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026439677458256483, + "timestamp": "2025-10-01 03:27:00.550723", + "step": 4832, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:00.582054", + "step": 4832, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032846289686858654, + "timestamp": "2025-10-01 03:27:00.584145", + "step": 4833, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.614667", + "step": 4833, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04173916578292847, + "timestamp": "2025-10-01 03:27:00.616870", + "step": 4834, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.647075", + "step": 4834, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0077222855761647224, + "timestamp": "2025-10-01 03:27:00.649476", + "step": 4835, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:00.680095", + "step": 4835, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029047658666968346, + "timestamp": "2025-10-01 03:27:00.703796", + "step": 4836, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.734794", + "step": 4836, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009635289199650288, + "timestamp": "2025-10-01 03:27:00.737308", + "step": 4837, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.767837", + "step": 4837, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005094275111332536, + "timestamp": "2025-10-01 03:27:00.770176", + "step": 4838, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.800989", + "step": 4838, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01811937429010868, + "timestamp": "2025-10-01 03:27:00.803059", + "step": 4839, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.834931", + "step": 4839, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00044978951336815953, + "timestamp": "2025-10-01 03:27:00.858867", + "step": 4840, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:00.890786", + "step": 4840, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03701077774167061, + "timestamp": "2025-10-01 03:27:00.892982", + "step": 4841, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.923587", + "step": 4841, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006409208290278912, + "timestamp": "2025-10-01 03:27:00.925787", + "step": 4842, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.957741", + "step": 4842, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006457540672272444, + "timestamp": "2025-10-01 03:27:00.961242", + "step": 4843, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:00.992297", + "step": 4843, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03347129374742508, + "timestamp": "2025-10-01 03:27:01.015988", + "step": 4844, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.046441", + "step": 4844, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007826571352779865, + "timestamp": "2025-10-01 03:27:01.048668", + "step": 4845, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.079156", + "step": 4845, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.038339000195264816, + "timestamp": "2025-10-01 03:27:01.081307", + "step": 4846, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.112831", + "step": 4846, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029854968190193176, + "timestamp": "2025-10-01 03:27:01.114989", + "step": 4847, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.145402", + "step": 4847, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001128735370002687, + "timestamp": "2025-10-01 03:27:01.169287", + "step": 4848, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.200458", + "step": 4848, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015062122838571668, + "timestamp": "2025-10-01 03:27:01.202612", + "step": 4849, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.233579", + "step": 4849, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.031183497980237007, + "timestamp": "2025-10-01 03:27:01.236626", + "step": 4850, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.268376", + "step": 4850, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008522591553628445, + "timestamp": "2025-10-01 03:27:01.271222", + "step": 4851, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.302794", + "step": 4851, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00038577645318582654, + "timestamp": "2025-10-01 03:27:01.326878", + "step": 4852, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.358125", + "step": 4852, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005619273870252073, + "timestamp": "2025-10-01 03:27:01.360578", + "step": 4853, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.391204", + "step": 4853, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034154329914599657, + "timestamp": "2025-10-01 03:27:01.393631", + "step": 4854, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.424548", + "step": 4854, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024289216846227646, + "timestamp": "2025-10-01 03:27:01.426970", + "step": 4855, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.457307", + "step": 4855, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03569275885820389, + "timestamp": "2025-10-01 03:27:01.481064", + "step": 4856, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.511401", + "step": 4856, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029694675467908382, + "timestamp": "2025-10-01 03:27:01.513499", + "step": 4857, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.545281", + "step": 4857, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001165154273621738, + "timestamp": "2025-10-01 03:27:01.547878", + "step": 4858, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.578374", + "step": 4858, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.038742948323488235, + "timestamp": "2025-10-01 03:27:01.580877", + "step": 4859, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.612461", + "step": 4859, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004684145096689463, + "timestamp": "2025-10-01 03:27:01.636120", + "step": 4860, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.666820", + "step": 4860, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007877816446125507, + "timestamp": "2025-10-01 03:27:01.668957", + "step": 4861, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.700394", + "step": 4861, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003075821092352271, + "timestamp": "2025-10-01 03:27:01.703129", + "step": 4862, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.734676", + "step": 4862, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004328253213316202, + "timestamp": "2025-10-01 03:27:01.736880", + "step": 4863, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:01.767624", + "step": 4863, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023849280551075935, + "timestamp": "2025-10-01 03:27:01.797358", + "step": 4864, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:27:04.083820", + "step": 4864, + "epoch": 2 + }, + { + "type": "pplx", + "content": 2123466.5329399807, + "timestamp": "2025-10-01 03:27:04.089922", + "step": 4864, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.123469", + "step": 4864, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017236320301890373, + "timestamp": "2025-10-01 03:27:04.129190", + "step": 4865, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.163816", + "step": 4865, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025285767391324043, + "timestamp": "2025-10-01 03:27:04.171431", + "step": 4866, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.202907", + "step": 4866, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.058331675827503204, + "timestamp": "2025-10-01 03:27:04.205255", + "step": 4867, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.235786", + "step": 4867, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005549633409827948, + "timestamp": "2025-10-01 03:27:04.260303", + "step": 4868, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.291846", + "step": 4868, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004669431131333113, + "timestamp": "2025-10-01 03:27:04.294419", + "step": 4869, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.324435", + "step": 4869, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006639298517256975, + "timestamp": "2025-10-01 03:27:04.326634", + "step": 4870, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.358555", + "step": 4870, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005064818076789379, + "timestamp": "2025-10-01 03:27:04.360746", + "step": 4871, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 1, + 80 + ], + "flops": 593517404912 + }, + "timestamp": "2025-10-01 03:27:04.391389", + "step": 4871, + "epoch": 2 + }, + { + "type": "loss", + "content": 2.6180869099334814e-05, + "timestamp": "2025-10-01 03:27:04.415204", + "step": 4872, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.447692", + "step": 4872, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004483114928007126, + "timestamp": "2025-10-01 03:27:04.449941", + "step": 4873, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.481002", + "step": 4873, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0039407541044056416, + "timestamp": "2025-10-01 03:27:04.483659", + "step": 4874, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.515409", + "step": 4874, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011083823628723621, + "timestamp": "2025-10-01 03:27:04.517405", + "step": 4875, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.548305", + "step": 4875, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018484745174646378, + "timestamp": "2025-10-01 03:27:04.572695", + "step": 4876, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.603434", + "step": 4876, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01997585967183113, + "timestamp": "2025-10-01 03:27:04.605887", + "step": 4877, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.636074", + "step": 4877, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001339600421488285, + "timestamp": "2025-10-01 03:27:04.638427", + "step": 4878, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.670245", + "step": 4878, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004409200511872768, + "timestamp": "2025-10-01 03:27:04.672474", + "step": 4879, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.712161", + "step": 4879, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005895250476896763, + "timestamp": "2025-10-01 03:27:04.735927", + "step": 4880, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:04.766214", + "step": 4880, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00809439830482006, + "timestamp": "2025-10-01 03:27:04.768640", + "step": 4881, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:04.799092", + "step": 4881, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014916504733264446, + "timestamp": "2025-10-01 03:27:04.801731", + "step": 4882, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.832510", + "step": 4882, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019562674686312675, + "timestamp": "2025-10-01 03:27:04.834957", + "step": 4883, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.867285", + "step": 4883, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010814501903951168, + "timestamp": "2025-10-01 03:27:04.893854", + "step": 4884, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:04.924235", + "step": 4884, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01799078844487667, + "timestamp": "2025-10-01 03:27:04.926354", + "step": 4885, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.957378", + "step": 4885, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0061707026325166225, + "timestamp": "2025-10-01 03:27:04.959741", + "step": 4886, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:04.989879", + "step": 4886, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026070473715662956, + "timestamp": "2025-10-01 03:27:04.992115", + "step": 4887, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.024243", + "step": 4887, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006437436677515507, + "timestamp": "2025-10-01 03:27:05.048172", + "step": 4888, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.087714", + "step": 4888, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001955627929419279, + "timestamp": "2025-10-01 03:27:05.089961", + "step": 4889, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:05.121084", + "step": 4889, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005153622478246689, + "timestamp": "2025-10-01 03:27:05.123361", + "step": 4890, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.153973", + "step": 4890, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004376337863504887, + "timestamp": "2025-10-01 03:27:05.156852", + "step": 4891, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.188557", + "step": 4891, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011297515593469143, + "timestamp": "2025-10-01 03:27:05.212378", + "step": 4892, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:05.242920", + "step": 4892, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008816507761366665, + "timestamp": "2025-10-01 03:27:05.245332", + "step": 4893, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.276309", + "step": 4893, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01229159813374281, + "timestamp": "2025-10-01 03:27:05.279524", + "step": 4894, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.314735", + "step": 4894, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002003398258239031, + "timestamp": "2025-10-01 03:27:05.317067", + "step": 4895, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.350568", + "step": 4895, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005958497058600187, + "timestamp": "2025-10-01 03:27:05.376903", + "step": 4896, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.414782", + "step": 4896, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004753305111080408, + "timestamp": "2025-10-01 03:27:05.417191", + "step": 4897, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.447425", + "step": 4897, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011822456726804376, + "timestamp": "2025-10-01 03:27:05.449683", + "step": 4898, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:05.484664", + "step": 4898, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024054476525634527, + "timestamp": "2025-10-01 03:27:05.487381", + "step": 4899, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.518073", + "step": 4899, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007270416710525751, + "timestamp": "2025-10-01 03:27:05.541893", + "step": 4900, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.573305", + "step": 4900, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015025261382106692, + "timestamp": "2025-10-01 03:27:05.575759", + "step": 4901, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.607246", + "step": 4901, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030254984740167856, + "timestamp": "2025-10-01 03:27:05.611694", + "step": 4902, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.641937", + "step": 4902, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010631255805492401, + "timestamp": "2025-10-01 03:27:05.644635", + "step": 4903, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.677965", + "step": 4903, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012905023759230971, + "timestamp": "2025-10-01 03:27:05.701799", + "step": 4904, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.733744", + "step": 4904, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009979077149182558, + "timestamp": "2025-10-01 03:27:05.735992", + "step": 4905, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:05.767223", + "step": 4905, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017695442074909806, + "timestamp": "2025-10-01 03:27:05.770078", + "step": 4906, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.808671", + "step": 4906, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030488003976643085, + "timestamp": "2025-10-01 03:27:05.811755", + "step": 4907, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.847160", + "step": 4907, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001108599899453111, + "timestamp": "2025-10-01 03:27:05.871295", + "step": 4908, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.902013", + "step": 4908, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019496626919135451, + "timestamp": "2025-10-01 03:27:05.907270", + "step": 4909, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.944155", + "step": 4909, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005090967286378145, + "timestamp": "2025-10-01 03:27:05.946314", + "step": 4910, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:05.978211", + "step": 4910, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008477013325318694, + "timestamp": "2025-10-01 03:27:05.980717", + "step": 4911, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.022282", + "step": 4911, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0050918362103402615, + "timestamp": "2025-10-01 03:27:06.046241", + "step": 4912, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.079118", + "step": 4912, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010947876144200563, + "timestamp": "2025-10-01 03:27:06.081385", + "step": 4913, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:06.120072", + "step": 4913, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048059257096610963, + "timestamp": "2025-10-01 03:27:06.122645", + "step": 4914, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.153167", + "step": 4914, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002707315841689706, + "timestamp": "2025-10-01 03:27:06.155467", + "step": 4915, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.186066", + "step": 4915, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01621435396373272, + "timestamp": "2025-10-01 03:27:06.209762", + "step": 4916, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.240828", + "step": 4916, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001916506327688694, + "timestamp": "2025-10-01 03:27:06.243184", + "step": 4917, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:06.274575", + "step": 4917, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0044997683726251125, + "timestamp": "2025-10-01 03:27:06.277097", + "step": 4918, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.309508", + "step": 4918, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000548254291061312, + "timestamp": "2025-10-01 03:27:06.311780", + "step": 4919, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.343540", + "step": 4919, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0335177481174469, + "timestamp": "2025-10-01 03:27:06.367314", + "step": 4920, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.398170", + "step": 4920, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006291854660958052, + "timestamp": "2025-10-01 03:27:06.400579", + "step": 4921, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:06.431274", + "step": 4921, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004453394096344709, + "timestamp": "2025-10-01 03:27:06.433789", + "step": 4922, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:06.464262", + "step": 4922, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003514503361657262, + "timestamp": "2025-10-01 03:27:06.466648", + "step": 4923, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:27:06.497242", + "step": 4923, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007112600840628147, + "timestamp": "2025-10-01 03:27:06.521286", + "step": 4924, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:06.552782", + "step": 4924, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.059510014951229095, + "timestamp": "2025-10-01 03:27:06.555335", + "step": 4925, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.586837", + "step": 4925, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006281991372816265, + "timestamp": "2025-10-01 03:27:06.589497", + "step": 4926, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.620412", + "step": 4926, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001470899791456759, + "timestamp": "2025-10-01 03:27:06.622671", + "step": 4927, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:06.653422", + "step": 4927, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00168563739862293, + "timestamp": "2025-10-01 03:27:06.677668", + "step": 4928, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.710042", + "step": 4928, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031188869616016746, + "timestamp": "2025-10-01 03:27:06.712458", + "step": 4929, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.743345", + "step": 4929, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003278688236605376, + "timestamp": "2025-10-01 03:27:06.745642", + "step": 4930, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:06.776744", + "step": 4930, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001121617533499375, + "timestamp": "2025-10-01 03:27:06.781179", + "step": 4931, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.824791", + "step": 4931, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01252612005919218, + "timestamp": "2025-10-01 03:27:06.848696", + "step": 4932, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.884355", + "step": 4932, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027505139587447047, + "timestamp": "2025-10-01 03:27:06.886758", + "step": 4933, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:06.917779", + "step": 4933, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01212241780012846, + "timestamp": "2025-10-01 03:27:06.920317", + "step": 4934, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.951513", + "step": 4934, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012211512075737119, + "timestamp": "2025-10-01 03:27:06.954215", + "step": 4935, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:06.990210", + "step": 4935, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01491442508995533, + "timestamp": "2025-10-01 03:27:07.020070", + "step": 4936, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.056458", + "step": 4936, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011118674650788307, + "timestamp": "2025-10-01 03:27:07.058888", + "step": 4937, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.089952", + "step": 4937, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012616602471098304, + "timestamp": "2025-10-01 03:27:07.097304", + "step": 4938, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.131676", + "step": 4938, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003818396246060729, + "timestamp": "2025-10-01 03:27:07.136553", + "step": 4939, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.168146", + "step": 4939, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008283453062176704, + "timestamp": "2025-10-01 03:27:07.192847", + "step": 4940, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:07.222953", + "step": 4940, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04830241948366165, + "timestamp": "2025-10-01 03:27:07.225280", + "step": 4941, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.257017", + "step": 4941, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000230679870583117, + "timestamp": "2025-10-01 03:27:07.262498", + "step": 4942, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.297363", + "step": 4942, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007708417833782732, + "timestamp": "2025-10-01 03:27:07.299866", + "step": 4943, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:07.332945", + "step": 4943, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01304767094552517, + "timestamp": "2025-10-01 03:27:07.356642", + "step": 4944, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.391932", + "step": 4944, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018735000863671303, + "timestamp": "2025-10-01 03:27:07.394196", + "step": 4945, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.435471", + "step": 4945, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00450141029432416, + "timestamp": "2025-10-01 03:27:07.437859", + "step": 4946, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:07.475531", + "step": 4946, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05449851602315903, + "timestamp": "2025-10-01 03:27:07.478017", + "step": 4947, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.508074", + "step": 4947, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003222418890800327, + "timestamp": "2025-10-01 03:27:07.532018", + "step": 4948, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.563007", + "step": 4948, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003308764426037669, + "timestamp": "2025-10-01 03:27:07.565634", + "step": 4949, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.596421", + "step": 4949, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03533325716853142, + "timestamp": "2025-10-01 03:27:07.599186", + "step": 4950, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:07.629777", + "step": 4950, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014933956786990166, + "timestamp": "2025-10-01 03:27:07.632255", + "step": 4951, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.679568", + "step": 4951, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.455451072426513e-05, + "timestamp": "2025-10-01 03:27:07.704635", + "step": 4952, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:27:07.744154", + "step": 4952, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018252633162774146, + "timestamp": "2025-10-01 03:27:07.746910", + "step": 4953, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.777460", + "step": 4953, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.043054524809122086, + "timestamp": "2025-10-01 03:27:07.779929", + "step": 4954, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.812532", + "step": 4954, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020583488512784243, + "timestamp": "2025-10-01 03:27:07.814963", + "step": 4955, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.845742", + "step": 4955, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009268363937735558, + "timestamp": "2025-10-01 03:27:07.869886", + "step": 4956, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.902366", + "step": 4956, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003240626771003008, + "timestamp": "2025-10-01 03:27:07.904755", + "step": 4957, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.934811", + "step": 4957, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007283573038876057, + "timestamp": "2025-10-01 03:27:07.937506", + "step": 4958, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:07.972031", + "step": 4958, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028929015388712287, + "timestamp": "2025-10-01 03:27:07.974417", + "step": 4959, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.007021", + "step": 4959, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008767792023718357, + "timestamp": "2025-10-01 03:27:08.030826", + "step": 4960, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.062147", + "step": 4960, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003475697594694793, + "timestamp": "2025-10-01 03:27:08.064907", + "step": 4961, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:08.096417", + "step": 4961, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009159293258562684, + "timestamp": "2025-10-01 03:27:08.099865", + "step": 4962, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:08.136432", + "step": 4962, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012452119262889028, + "timestamp": "2025-10-01 03:27:08.138893", + "step": 4963, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.169588", + "step": 4963, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.031597550958395004, + "timestamp": "2025-10-01 03:27:08.193790", + "step": 4964, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.224510", + "step": 4964, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007390592712908983, + "timestamp": "2025-10-01 03:27:08.226836", + "step": 4965, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.257960", + "step": 4965, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01684156246483326, + "timestamp": "2025-10-01 03:27:08.260373", + "step": 4966, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.291259", + "step": 4966, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.06775902211666107, + "timestamp": "2025-10-01 03:27:08.293605", + "step": 4967, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.324174", + "step": 4967, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004665348387788981, + "timestamp": "2025-10-01 03:27:08.348214", + "step": 4968, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:08.379815", + "step": 4968, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003334178763907403, + "timestamp": "2025-10-01 03:27:08.382501", + "step": 4969, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.413707", + "step": 4969, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002740953641477972, + "timestamp": "2025-10-01 03:27:08.416289", + "step": 4970, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.447803", + "step": 4970, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001959148095920682, + "timestamp": "2025-10-01 03:27:08.450229", + "step": 4971, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:08.481940", + "step": 4971, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006735817529261112, + "timestamp": "2025-10-01 03:27:08.505925", + "step": 4972, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.536669", + "step": 4972, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030041637364774942, + "timestamp": "2025-10-01 03:27:08.538967", + "step": 4973, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.569955", + "step": 4973, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02494579181075096, + "timestamp": "2025-10-01 03:27:08.572250", + "step": 4974, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:08.603438", + "step": 4974, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03369921073317528, + "timestamp": "2025-10-01 03:27:08.605930", + "step": 4975, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.636499", + "step": 4975, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008287516538985074, + "timestamp": "2025-10-01 03:27:08.660370", + "step": 4976, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:08.692951", + "step": 4976, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00189091672655195, + "timestamp": "2025-10-01 03:27:08.695333", + "step": 4977, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.726618", + "step": 4977, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010905207600444555, + "timestamp": "2025-10-01 03:27:08.729128", + "step": 4978, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.760506", + "step": 4978, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00458510173484683, + "timestamp": "2025-10-01 03:27:08.762976", + "step": 4979, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.794685", + "step": 4979, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017661402001976967, + "timestamp": "2025-10-01 03:27:08.818602", + "step": 4980, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.848962", + "step": 4980, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044432171853259206, + "timestamp": "2025-10-01 03:27:08.851643", + "step": 4981, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.882855", + "step": 4981, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.06279333680868149, + "timestamp": "2025-10-01 03:27:08.885647", + "step": 4982, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:08.916606", + "step": 4982, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026358243077993393, + "timestamp": "2025-10-01 03:27:08.919074", + "step": 4983, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:08.949705", + "step": 4983, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013444015057757497, + "timestamp": "2025-10-01 03:27:08.973584", + "step": 4984, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:09.004048", + "step": 4984, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004488146863877773, + "timestamp": "2025-10-01 03:27:09.006640", + "step": 4985, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:09.039384", + "step": 4985, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021878727711737156, + "timestamp": "2025-10-01 03:27:09.045378", + "step": 4986, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:09.076790", + "step": 4986, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.037158768624067307, + "timestamp": "2025-10-01 03:27:09.079147", + "step": 4987, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:09.110806", + "step": 4987, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023514132481068373, + "timestamp": "2025-10-01 03:27:09.134743", + "step": 4988, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:09.166836", + "step": 4988, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011381936259567738, + "timestamp": "2025-10-01 03:27:09.169417", + "step": 4989, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:09.200499", + "step": 4989, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028929519467055798, + "timestamp": "2025-10-01 03:27:09.202873", + "step": 4990, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:09.237670", + "step": 4990, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003703839029185474, + "timestamp": "2025-10-01 03:27:09.240162", + "step": 4991, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:09.271122", + "step": 4991, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017099758610129356, + "timestamp": "2025-10-01 03:27:09.295050", + "step": 4992, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:09.325163", + "step": 4992, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008138286648318172, + "timestamp": "2025-10-01 03:27:09.327519", + "step": 4993, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:09.359136", + "step": 4993, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028068770188838243, + "timestamp": "2025-10-01 03:27:09.361622", + "step": 4994, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:09.392886", + "step": 4994, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010663175489753485, + "timestamp": "2025-10-01 03:27:09.395958", + "step": 4995, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:09.427034", + "step": 4995, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013888709945604205, + "timestamp": "2025-10-01 03:27:09.450883", + "step": 4996, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:09.481217", + "step": 4996, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003078560112044215, + "timestamp": "2025-10-01 03:27:09.483660", + "step": 4997, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:09.513965", + "step": 4997, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007033192669041455, + "timestamp": "2025-10-01 03:27:09.516773", + "step": 4998, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:09.547495", + "step": 4998, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005803682375699282, + "timestamp": "2025-10-01 03:27:09.549753", + "step": 4999, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:09.580181", + "step": 4999, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0301644466817379, + "timestamp": "2025-10-01 03:27:09.604007", + "step": 5000, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 5000", + "timestamp": "2025-10-01 03:27:14.278278", + "step": 5000, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:14.318477", + "step": 5000, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014221572782844305, + "timestamp": "2025-10-01 03:27:14.320745", + "step": 5001, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:14.358579", + "step": 5001, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010839112801477313, + "timestamp": "2025-10-01 03:27:14.360798", + "step": 5002, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:14.393474", + "step": 5002, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010643706656992435, + "timestamp": "2025-10-01 03:27:14.396020", + "step": 5003, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:14.437873", + "step": 5003, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01934877224266529, + "timestamp": "2025-10-01 03:27:14.461819", + "step": 5004, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:14.494875", + "step": 5004, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.020661434158682823, + "timestamp": "2025-10-01 03:27:14.498512", + "step": 5005, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:14.530422", + "step": 5005, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002896282821893692, + "timestamp": "2025-10-01 03:27:14.532629", + "step": 5006, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:14.571105", + "step": 5006, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029078894294798374, + "timestamp": "2025-10-01 03:27:14.573570", + "step": 5007, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:14.618010", + "step": 5007, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005119448062032461, + "timestamp": "2025-10-01 03:27:14.641971", + "step": 5008, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:14.673469", + "step": 5008, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004693463444709778, + "timestamp": "2025-10-01 03:27:14.675825", + "step": 5009, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:14.712231", + "step": 5009, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00360560929402709, + "timestamp": "2025-10-01 03:27:14.714643", + "step": 5010, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:14.755369", + "step": 5010, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006336073391139507, + "timestamp": "2025-10-01 03:27:14.757649", + "step": 5011, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:14.795055", + "step": 5011, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006479181814938784, + "timestamp": "2025-10-01 03:27:14.819388", + "step": 5012, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:14.851924", + "step": 5012, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010129333473742008, + "timestamp": "2025-10-01 03:27:14.858431", + "step": 5013, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:14.889890", + "step": 5013, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006118377787061036, + "timestamp": "2025-10-01 03:27:14.892313", + "step": 5014, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:14.924895", + "step": 5014, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010069304844364524, + "timestamp": "2025-10-01 03:27:14.927136", + "step": 5015, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:14.958297", + "step": 5015, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01545338798314333, + "timestamp": "2025-10-01 03:27:14.982134", + "step": 5016, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:27:17.845730", + "step": 5016, + "epoch": 3 + }, + { + "type": "pplx", + "content": 1791683.787394418, + "timestamp": "2025-10-01 03:27:17.850714", + "step": 5016, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:17.897687", + "step": 5016, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018604248762130737, + "timestamp": "2025-10-01 03:27:17.903616", + "step": 5017, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:17.942444", + "step": 5017, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0027935043908655643, + "timestamp": "2025-10-01 03:27:17.946208", + "step": 5018, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.001748", + "step": 5018, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010647125309333205, + "timestamp": "2025-10-01 03:27:18.011511", + "step": 5019, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.049832", + "step": 5019, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011252948315814137, + "timestamp": "2025-10-01 03:27:18.080882", + "step": 5020, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.127906", + "step": 5020, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01638566330075264, + "timestamp": "2025-10-01 03:27:18.136143", + "step": 5021, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.191667", + "step": 5021, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025012462865561247, + "timestamp": "2025-10-01 03:27:18.198023", + "step": 5022, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:18.234934", + "step": 5022, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017198451096192002, + "timestamp": "2025-10-01 03:27:18.238581", + "step": 5023, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.276659", + "step": 5023, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004202567506581545, + "timestamp": "2025-10-01 03:27:18.304037", + "step": 5024, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.347031", + "step": 5024, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009491393342614174, + "timestamp": "2025-10-01 03:27:18.353735", + "step": 5025, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.401291", + "step": 5025, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004858844913542271, + "timestamp": "2025-10-01 03:27:18.408643", + "step": 5026, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:18.448747", + "step": 5026, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011130036000395194, + "timestamp": "2025-10-01 03:27:18.456799", + "step": 5027, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.504901", + "step": 5027, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006912060081958771, + "timestamp": "2025-10-01 03:27:18.532986", + "step": 5028, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:18.575505", + "step": 5028, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008404338150285184, + "timestamp": "2025-10-01 03:27:18.584531", + "step": 5029, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:18.626576", + "step": 5029, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007116858614608645, + "timestamp": "2025-10-01 03:27:18.635868", + "step": 5030, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.686844", + "step": 5030, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016729866911191493, + "timestamp": "2025-10-01 03:27:18.693382", + "step": 5031, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.733378", + "step": 5031, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007244686712510884, + "timestamp": "2025-10-01 03:27:18.762575", + "step": 5032, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.808296", + "step": 5032, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029826356330886483, + "timestamp": "2025-10-01 03:27:18.815075", + "step": 5033, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.851280", + "step": 5033, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005248495144769549, + "timestamp": "2025-10-01 03:27:18.859437", + "step": 5034, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.898160", + "step": 5034, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000234507693676278, + "timestamp": "2025-10-01 03:27:18.905179", + "step": 5035, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:18.956541", + "step": 5035, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001319164759479463, + "timestamp": "2025-10-01 03:27:18.987791", + "step": 5036, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:19.022888", + "step": 5036, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00224702013656497, + "timestamp": "2025-10-01 03:27:19.028649", + "step": 5037, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:19.088457", + "step": 5037, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.06625332683324814, + "timestamp": "2025-10-01 03:27:19.095528", + "step": 5038, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:19.139992", + "step": 5038, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013869233953300864, + "timestamp": "2025-10-01 03:27:19.150656", + "step": 5039, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:19.204868", + "step": 5039, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030927377520129085, + "timestamp": "2025-10-01 03:27:19.238532", + "step": 5040, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:19.298279", + "step": 5040, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008779219351708889, + "timestamp": "2025-10-01 03:27:19.308032", + "step": 5041, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:19.353035", + "step": 5041, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006712921313010156, + "timestamp": "2025-10-01 03:27:19.371193", + "step": 5042, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:19.424991", + "step": 5042, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004044482484459877, + "timestamp": "2025-10-01 03:27:19.430777", + "step": 5043, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:19.470823", + "step": 5043, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0072203585878014565, + "timestamp": "2025-10-01 03:27:19.500636", + "step": 5044, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:19.575560", + "step": 5044, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013611512258648872, + "timestamp": "2025-10-01 03:27:19.585074", + "step": 5045, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:19.629138", + "step": 5045, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012702211970463395, + "timestamp": "2025-10-01 03:27:19.638689", + "step": 5046, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:19.693980", + "step": 5046, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021705201652366668, + "timestamp": "2025-10-01 03:27:19.702693", + "step": 5047, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:19.751496", + "step": 5047, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002469139639288187, + "timestamp": "2025-10-01 03:27:19.785550", + "step": 5048, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:19.834859", + "step": 5048, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018996965081896633, + "timestamp": "2025-10-01 03:27:19.847266", + "step": 5049, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:19.888110", + "step": 5049, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002747912076301873, + "timestamp": "2025-10-01 03:27:19.898876", + "step": 5050, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:19.939377", + "step": 5050, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012939995212946087, + "timestamp": "2025-10-01 03:27:19.950325", + "step": 5051, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:19.991361", + "step": 5051, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003645049291662872, + "timestamp": "2025-10-01 03:27:20.021607", + "step": 5052, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:20.058259", + "step": 5052, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012930564116686583, + "timestamp": "2025-10-01 03:27:20.071342", + "step": 5053, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.128484", + "step": 5053, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043644403922371566, + "timestamp": "2025-10-01 03:27:20.138861", + "step": 5054, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.184211", + "step": 5054, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002445190621074289, + "timestamp": "2025-10-01 03:27:20.196840", + "step": 5055, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:20.241691", + "step": 5055, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011973767686868086, + "timestamp": "2025-10-01 03:27:20.275035", + "step": 5056, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.316024", + "step": 5056, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005771873984485865, + "timestamp": "2025-10-01 03:27:20.325838", + "step": 5057, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.370192", + "step": 5057, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005199818988330662, + "timestamp": "2025-10-01 03:27:20.377805", + "step": 5058, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.420575", + "step": 5058, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010282353032380342, + "timestamp": "2025-10-01 03:27:20.424653", + "step": 5059, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.460536", + "step": 5059, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010168671142309904, + "timestamp": "2025-10-01 03:27:20.491523", + "step": 5060, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.531944", + "step": 5060, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002135695976903662, + "timestamp": "2025-10-01 03:27:20.542635", + "step": 5061, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.596109", + "step": 5061, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010028394171968102, + "timestamp": "2025-10-01 03:27:20.601213", + "step": 5062, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.648718", + "step": 5062, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013909416738897562, + "timestamp": "2025-10-01 03:27:20.668392", + "step": 5063, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.711996", + "step": 5063, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010382368782302365, + "timestamp": "2025-10-01 03:27:20.738155", + "step": 5064, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.792174", + "step": 5064, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.022659823298454285, + "timestamp": "2025-10-01 03:27:20.798135", + "step": 5065, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.853929", + "step": 5065, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003915857814718038, + "timestamp": "2025-10-01 03:27:20.863047", + "step": 5066, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.912015", + "step": 5066, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008134839008562267, + "timestamp": "2025-10-01 03:27:20.927751", + "step": 5067, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:20.978666", + "step": 5067, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026423588395118713, + "timestamp": "2025-10-01 03:27:21.007878", + "step": 5068, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:21.051723", + "step": 5068, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005316815222613513, + "timestamp": "2025-10-01 03:27:21.055161", + "step": 5069, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.106685", + "step": 5069, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025442696642130613, + "timestamp": "2025-10-01 03:27:21.119815", + "step": 5070, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.164352", + "step": 5070, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032054597977548838, + "timestamp": "2025-10-01 03:27:21.167468", + "step": 5071, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.210099", + "step": 5071, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008023085072636604, + "timestamp": "2025-10-01 03:27:21.242809", + "step": 5072, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.285573", + "step": 5072, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045440721441991627, + "timestamp": "2025-10-01 03:27:21.289474", + "step": 5073, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:21.324597", + "step": 5073, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002286948263645172, + "timestamp": "2025-10-01 03:27:21.336042", + "step": 5074, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.370615", + "step": 5074, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.038845229893922806, + "timestamp": "2025-10-01 03:27:21.379344", + "step": 5075, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.423777", + "step": 5075, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002191944047808647, + "timestamp": "2025-10-01 03:27:21.450096", + "step": 5076, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.491680", + "step": 5076, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000971935864072293, + "timestamp": "2025-10-01 03:27:21.494538", + "step": 5077, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.544326", + "step": 5077, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015154435532167554, + "timestamp": "2025-10-01 03:27:21.552177", + "step": 5078, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.594754", + "step": 5078, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02748478576540947, + "timestamp": "2025-10-01 03:27:21.602618", + "step": 5079, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.660912", + "step": 5079, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007205713773146272, + "timestamp": "2025-10-01 03:27:21.691776", + "step": 5080, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.737310", + "step": 5080, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003075872955378145, + "timestamp": "2025-10-01 03:27:21.748427", + "step": 5081, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.784942", + "step": 5081, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003415877290535718, + "timestamp": "2025-10-01 03:27:21.788688", + "step": 5082, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.840028", + "step": 5082, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01920855976641178, + "timestamp": "2025-10-01 03:27:21.843560", + "step": 5083, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.881824", + "step": 5083, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002541638386901468, + "timestamp": "2025-10-01 03:27:21.909836", + "step": 5084, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:21.944966", + "step": 5084, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001214299933053553, + "timestamp": "2025-10-01 03:27:21.948120", + "step": 5085, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:21.987556", + "step": 5085, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010467985412105918, + "timestamp": "2025-10-01 03:27:21.991821", + "step": 5086, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:22.038367", + "step": 5086, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0038063216488808393, + "timestamp": "2025-10-01 03:27:22.050169", + "step": 5087, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:22.085189", + "step": 5087, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0037484115455299616, + "timestamp": "2025-10-01 03:27:22.111451", + "step": 5088, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:22.150227", + "step": 5088, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008039521053433418, + "timestamp": "2025-10-01 03:27:22.155602", + "step": 5089, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:22.190780", + "step": 5089, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006579491309821606, + "timestamp": "2025-10-01 03:27:22.203751", + "step": 5090, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:22.243211", + "step": 5090, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002507316181436181, + "timestamp": "2025-10-01 03:27:22.255483", + "step": 5091, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:22.292322", + "step": 5091, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0495547391474247, + "timestamp": "2025-10-01 03:27:22.324528", + "step": 5092, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:22.397109", + "step": 5092, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002638453443069011, + "timestamp": "2025-10-01 03:27:22.412028", + "step": 5093, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:22.454792", + "step": 5093, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.329206709982827e-05, + "timestamp": "2025-10-01 03:27:22.464767", + "step": 5094, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:22.504496", + "step": 5094, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028102321084588766, + "timestamp": "2025-10-01 03:27:22.513459", + "step": 5095, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:22.579842", + "step": 5095, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04858572408556938, + "timestamp": "2025-10-01 03:27:22.612541", + "step": 5096, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:22.661949", + "step": 5096, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.06532160937786102, + "timestamp": "2025-10-01 03:27:22.671353", + "step": 5097, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:22.732648", + "step": 5097, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013345970772206783, + "timestamp": "2025-10-01 03:27:22.742209", + "step": 5098, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:22.781962", + "step": 5098, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004412249196320772, + "timestamp": "2025-10-01 03:27:22.791008", + "step": 5099, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:22.839884", + "step": 5099, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009544230415485799, + "timestamp": "2025-10-01 03:27:22.870261", + "step": 5100, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:22.928588", + "step": 5100, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005729700904339552, + "timestamp": "2025-10-01 03:27:22.949088", + "step": 5101, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:22.999521", + "step": 5101, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008334302343428135, + "timestamp": "2025-10-01 03:27:23.012842", + "step": 5102, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:23.062680", + "step": 5102, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033053383231163025, + "timestamp": "2025-10-01 03:27:23.083021", + "step": 5103, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:23.143941", + "step": 5103, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048107016482390463, + "timestamp": "2025-10-01 03:27:23.176394", + "step": 5104, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:23.220876", + "step": 5104, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005930814892053604, + "timestamp": "2025-10-01 03:27:23.236820", + "step": 5105, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:23.283842", + "step": 5105, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002620472339913249, + "timestamp": "2025-10-01 03:27:23.318569", + "step": 5106, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:23.376785", + "step": 5106, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018980642198584974, + "timestamp": "2025-10-01 03:27:23.398452", + "step": 5107, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:23.439774", + "step": 5107, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011625501792877913, + "timestamp": "2025-10-01 03:27:23.481698", + "step": 5108, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:23.521443", + "step": 5108, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004130837507545948, + "timestamp": "2025-10-01 03:27:23.533007", + "step": 5109, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:23.591673", + "step": 5109, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002553019905462861, + "timestamp": "2025-10-01 03:27:23.596132", + "step": 5110, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:23.650008", + "step": 5110, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004686383996158838, + "timestamp": "2025-10-01 03:27:23.657109", + "step": 5111, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:23.697587", + "step": 5111, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019516689935699105, + "timestamp": "2025-10-01 03:27:23.722111", + "step": 5112, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:23.765312", + "step": 5112, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.306427091360092e-05, + "timestamp": "2025-10-01 03:27:23.768573", + "step": 5113, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:23.807413", + "step": 5113, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006136004347354174, + "timestamp": "2025-10-01 03:27:23.810402", + "step": 5114, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:23.847907", + "step": 5114, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0050107925198972225, + "timestamp": "2025-10-01 03:27:23.854250", + "step": 5115, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:23.906639", + "step": 5115, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.027830690145492554, + "timestamp": "2025-10-01 03:27:23.933651", + "step": 5116, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:23.971510", + "step": 5116, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007187852170318365, + "timestamp": "2025-10-01 03:27:23.978077", + "step": 5117, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.018052", + "step": 5117, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013639400713145733, + "timestamp": "2025-10-01 03:27:24.022100", + "step": 5118, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.061228", + "step": 5118, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017444057448301464, + "timestamp": "2025-10-01 03:27:24.065241", + "step": 5119, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:24.101028", + "step": 5119, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02796614170074463, + "timestamp": "2025-10-01 03:27:24.125608", + "step": 5120, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.164880", + "step": 5120, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005600228440016508, + "timestamp": "2025-10-01 03:27:24.167888", + "step": 5121, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.202950", + "step": 5121, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037662911927327514, + "timestamp": "2025-10-01 03:27:24.208455", + "step": 5122, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:24.247479", + "step": 5122, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021952826529741287, + "timestamp": "2025-10-01 03:27:24.255716", + "step": 5123, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.297664", + "step": 5123, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022510518319904804, + "timestamp": "2025-10-01 03:27:24.327912", + "step": 5124, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:24.364800", + "step": 5124, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.052036102861166, + "timestamp": "2025-10-01 03:27:24.369862", + "step": 5125, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.408397", + "step": 5125, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043146879761479795, + "timestamp": "2025-10-01 03:27:24.412447", + "step": 5126, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.462886", + "step": 5126, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007110378937795758, + "timestamp": "2025-10-01 03:27:24.467417", + "step": 5127, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.511264", + "step": 5127, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001074533094651997, + "timestamp": "2025-10-01 03:27:24.538629", + "step": 5128, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:24.580320", + "step": 5128, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015700358198955655, + "timestamp": "2025-10-01 03:27:24.586563", + "step": 5129, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.637371", + "step": 5129, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007460080087184906, + "timestamp": "2025-10-01 03:27:24.647148", + "step": 5130, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.685512", + "step": 5130, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034128674305975437, + "timestamp": "2025-10-01 03:27:24.689494", + "step": 5131, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.741647", + "step": 5131, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005004492122679949, + "timestamp": "2025-10-01 03:27:24.772912", + "step": 5132, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.812578", + "step": 5132, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018599387258291245, + "timestamp": "2025-10-01 03:27:24.817148", + "step": 5133, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.860559", + "step": 5133, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018399177119135857, + "timestamp": "2025-10-01 03:27:24.869750", + "step": 5134, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.911054", + "step": 5134, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02062077820301056, + "timestamp": "2025-10-01 03:27:24.919972", + "step": 5135, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:24.957230", + "step": 5135, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027105535264126956, + "timestamp": "2025-10-01 03:27:24.990683", + "step": 5136, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:25.033044", + "step": 5136, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000738164409995079, + "timestamp": "2025-10-01 03:27:25.046023", + "step": 5137, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:25.099628", + "step": 5137, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011577075347304344, + "timestamp": "2025-10-01 03:27:25.111534", + "step": 5138, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:25.152001", + "step": 5138, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002981076540891081, + "timestamp": "2025-10-01 03:27:25.162466", + "step": 5139, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:25.201533", + "step": 5139, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006777240778319538, + "timestamp": "2025-10-01 03:27:25.233497", + "step": 5140, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:25.274645", + "step": 5140, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003691131714731455, + "timestamp": "2025-10-01 03:27:25.288336", + "step": 5141, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:25.332168", + "step": 5141, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029405909590423107, + "timestamp": "2025-10-01 03:27:25.344109", + "step": 5142, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:25.387429", + "step": 5142, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007077937480062246, + "timestamp": "2025-10-01 03:27:25.398120", + "step": 5143, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:25.456440", + "step": 5143, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009480150416493416, + "timestamp": "2025-10-01 03:27:25.497251", + "step": 5144, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:25.546225", + "step": 5144, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025579825160093606, + "timestamp": "2025-10-01 03:27:25.553907", + "step": 5145, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:25.590789", + "step": 5145, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006254679057747126, + "timestamp": "2025-10-01 03:27:25.600495", + "step": 5146, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:25.654342", + "step": 5146, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.041946250945329666, + "timestamp": "2025-10-01 03:27:25.668710", + "step": 5147, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:25.716522", + "step": 5147, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003107046941295266, + "timestamp": "2025-10-01 03:27:25.743943", + "step": 5148, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:25.782169", + "step": 5148, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010626322589814663, + "timestamp": "2025-10-01 03:27:25.785576", + "step": 5149, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:25.825275", + "step": 5149, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013658223673701286, + "timestamp": "2025-10-01 03:27:25.830373", + "step": 5150, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:25.866231", + "step": 5150, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03985609486699104, + "timestamp": "2025-10-01 03:27:25.872041", + "step": 5151, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:25.909153", + "step": 5151, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005135187529958785, + "timestamp": "2025-10-01 03:27:25.947617", + "step": 5152, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:25.984343", + "step": 5152, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003254202427342534, + "timestamp": "2025-10-01 03:27:25.989295", + "step": 5153, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:26.025888", + "step": 5153, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03681738302111626, + "timestamp": "2025-10-01 03:27:26.031912", + "step": 5154, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:26.067608", + "step": 5154, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012572960695251822, + "timestamp": "2025-10-01 03:27:26.073326", + "step": 5155, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:26.110544", + "step": 5155, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02482718415558338, + "timestamp": "2025-10-01 03:27:26.138936", + "step": 5156, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:26.186276", + "step": 5156, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020925761200487614, + "timestamp": "2025-10-01 03:27:26.192428", + "step": 5157, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:26.232136", + "step": 5157, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003270341258030385, + "timestamp": "2025-10-01 03:27:26.239293", + "step": 5158, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:26.276289", + "step": 5158, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007729591452516615, + "timestamp": "2025-10-01 03:27:26.282570", + "step": 5159, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:26.321532", + "step": 5159, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008877341635525227, + "timestamp": "2025-10-01 03:27:26.350007", + "step": 5160, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:26.386602", + "step": 5160, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014320863410830498, + "timestamp": "2025-10-01 03:27:26.393043", + "step": 5161, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:26.436549", + "step": 5161, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011682066135108471, + "timestamp": "2025-10-01 03:27:26.443066", + "step": 5162, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:26.478588", + "step": 5162, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004601411346811801, + "timestamp": "2025-10-01 03:27:26.481446", + "step": 5163, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:26.518426", + "step": 5163, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004506581462919712, + "timestamp": "2025-10-01 03:27:26.548852", + "step": 5164, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:26.586053", + "step": 5164, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013681028503924608, + "timestamp": "2025-10-01 03:27:26.594234", + "step": 5165, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:26.638179", + "step": 5165, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.06648745387792587, + "timestamp": "2025-10-01 03:27:26.641571", + "step": 5166, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:26.690145", + "step": 5166, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009341529570519924, + "timestamp": "2025-10-01 03:27:26.701623", + "step": 5167, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:26.747863", + "step": 5167, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016500920057296753, + "timestamp": "2025-10-01 03:27:26.773330", + "step": 5168, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:27:29.033664", + "step": 5168, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2028113.7337095519, + "timestamp": "2025-10-01 03:27:29.036374", + "step": 5168, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.065373", + "step": 5168, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004394799761939794, + "timestamp": "2025-10-01 03:27:29.068025", + "step": 5169, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.098809", + "step": 5169, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008374712197110057, + "timestamp": "2025-10-01 03:27:29.101306", + "step": 5170, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.133817", + "step": 5170, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00660217460244894, + "timestamp": "2025-10-01 03:27:29.136045", + "step": 5171, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.167789", + "step": 5171, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012282898649573326, + "timestamp": "2025-10-01 03:27:29.191873", + "step": 5172, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.222588", + "step": 5172, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028815693804062903, + "timestamp": "2025-10-01 03:27:29.225040", + "step": 5173, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.256532", + "step": 5173, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012587221572175622, + "timestamp": "2025-10-01 03:27:29.258961", + "step": 5174, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.289738", + "step": 5174, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009591369889676571, + "timestamp": "2025-10-01 03:27:29.292001", + "step": 5175, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.322432", + "step": 5175, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008188021602109075, + "timestamp": "2025-10-01 03:27:29.347152", + "step": 5176, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.377734", + "step": 5176, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01254944410175085, + "timestamp": "2025-10-01 03:27:29.380326", + "step": 5177, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.417973", + "step": 5177, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007429266814142466, + "timestamp": "2025-10-01 03:27:29.419963", + "step": 5178, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.450472", + "step": 5178, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008940002880990505, + "timestamp": "2025-10-01 03:27:29.452889", + "step": 5179, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.483667", + "step": 5179, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005555932875722647, + "timestamp": "2025-10-01 03:27:29.507431", + "step": 5180, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:29.538263", + "step": 5180, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005470696487464011, + "timestamp": "2025-10-01 03:27:29.541685", + "step": 5181, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.572217", + "step": 5181, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006819238420575857, + "timestamp": "2025-10-01 03:27:29.574695", + "step": 5182, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:29.605208", + "step": 5182, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015189099358394742, + "timestamp": "2025-10-01 03:27:29.607603", + "step": 5183, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.638770", + "step": 5183, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013027645647525787, + "timestamp": "2025-10-01 03:27:29.663352", + "step": 5184, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.694285", + "step": 5184, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015779304085299373, + "timestamp": "2025-10-01 03:27:29.696898", + "step": 5185, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:29.729211", + "step": 5185, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024818226229399443, + "timestamp": "2025-10-01 03:27:29.732010", + "step": 5186, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.777174", + "step": 5186, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036382299731485546, + "timestamp": "2025-10-01 03:27:29.780497", + "step": 5187, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.819881", + "step": 5187, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016134501493070275, + "timestamp": "2025-10-01 03:27:29.844357", + "step": 5188, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:29.884385", + "step": 5188, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015984226483851671, + "timestamp": "2025-10-01 03:27:29.889934", + "step": 5189, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.938266", + "step": 5189, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002478359965607524, + "timestamp": "2025-10-01 03:27:29.944524", + "step": 5190, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:29.977173", + "step": 5190, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036191122489981353, + "timestamp": "2025-10-01 03:27:29.980343", + "step": 5191, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.013071", + "step": 5191, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008382400847040117, + "timestamp": "2025-10-01 03:27:30.037513", + "step": 5192, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.069849", + "step": 5192, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011244445340707898, + "timestamp": "2025-10-01 03:27:30.073009", + "step": 5193, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:30.109500", + "step": 5193, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02117796055972576, + "timestamp": "2025-10-01 03:27:30.114499", + "step": 5194, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:30.164514", + "step": 5194, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004992929752916098, + "timestamp": "2025-10-01 03:27:30.167412", + "step": 5195, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.202443", + "step": 5195, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020640669390559196, + "timestamp": "2025-10-01 03:27:30.227157", + "step": 5196, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:30.259427", + "step": 5196, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016591299325227737, + "timestamp": "2025-10-01 03:27:30.265768", + "step": 5197, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.302421", + "step": 5197, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05185640975832939, + "timestamp": "2025-10-01 03:27:30.306759", + "step": 5198, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.339086", + "step": 5198, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01033956278115511, + "timestamp": "2025-10-01 03:27:30.350006", + "step": 5199, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.385880", + "step": 5199, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012594601139426231, + "timestamp": "2025-10-01 03:27:30.410953", + "step": 5200, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.454830", + "step": 5200, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012464222498238087, + "timestamp": "2025-10-01 03:27:30.458159", + "step": 5201, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.489684", + "step": 5201, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0144509207457304, + "timestamp": "2025-10-01 03:27:30.493249", + "step": 5202, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.533681", + "step": 5202, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001990407326957211, + "timestamp": "2025-10-01 03:27:30.536636", + "step": 5203, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:30.568372", + "step": 5203, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002930985065177083, + "timestamp": "2025-10-01 03:27:30.592589", + "step": 5204, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.626548", + "step": 5204, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005367274396121502, + "timestamp": "2025-10-01 03:27:30.629502", + "step": 5205, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.660942", + "step": 5205, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012451792135834694, + "timestamp": "2025-10-01 03:27:30.663817", + "step": 5206, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.696199", + "step": 5206, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00230482779443264, + "timestamp": "2025-10-01 03:27:30.698661", + "step": 5207, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.730477", + "step": 5207, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008381274528801441, + "timestamp": "2025-10-01 03:27:30.754146", + "step": 5208, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.786883", + "step": 5208, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018739309161901474, + "timestamp": "2025-10-01 03:27:30.792017", + "step": 5209, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.834683", + "step": 5209, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03917226195335388, + "timestamp": "2025-10-01 03:27:30.841474", + "step": 5210, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.875148", + "step": 5210, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014392008073627949, + "timestamp": "2025-10-01 03:27:30.878994", + "step": 5211, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:30.913051", + "step": 5211, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047439479385502636, + "timestamp": "2025-10-01 03:27:30.938631", + "step": 5212, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:30.971189", + "step": 5212, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.756734652910382e-05, + "timestamp": "2025-10-01 03:27:30.973627", + "step": 5213, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.008716", + "step": 5213, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014204782200977206, + "timestamp": "2025-10-01 03:27:31.013857", + "step": 5214, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.050777", + "step": 5214, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044224216253496706, + "timestamp": "2025-10-01 03:27:31.053151", + "step": 5215, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.086235", + "step": 5215, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017491650069132447, + "timestamp": "2025-10-01 03:27:31.110107", + "step": 5216, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:31.153257", + "step": 5216, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040336899110116065, + "timestamp": "2025-10-01 03:27:31.155662", + "step": 5217, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.188003", + "step": 5217, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05113130435347557, + "timestamp": "2025-10-01 03:27:31.190282", + "step": 5218, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.220984", + "step": 5218, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017951189074665308, + "timestamp": "2025-10-01 03:27:31.223493", + "step": 5219, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.255131", + "step": 5219, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001801609992980957, + "timestamp": "2025-10-01 03:27:31.279348", + "step": 5220, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.311304", + "step": 5220, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001257885742234066, + "timestamp": "2025-10-01 03:27:31.313792", + "step": 5221, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.344384", + "step": 5221, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008343958295881748, + "timestamp": "2025-10-01 03:27:31.346634", + "step": 5222, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.378797", + "step": 5222, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0073517682030797005, + "timestamp": "2025-10-01 03:27:31.381176", + "step": 5223, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.412044", + "step": 5223, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009193417616188526, + "timestamp": "2025-10-01 03:27:31.435803", + "step": 5224, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.467245", + "step": 5224, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004602374974638224, + "timestamp": "2025-10-01 03:27:31.470251", + "step": 5225, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.504474", + "step": 5225, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031713012140244246, + "timestamp": "2025-10-01 03:27:31.508084", + "step": 5226, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:31.538506", + "step": 5226, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034046124201267958, + "timestamp": "2025-10-01 03:27:31.544225", + "step": 5227, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.575330", + "step": 5227, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0056508746929466724, + "timestamp": "2025-10-01 03:27:31.599779", + "step": 5228, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.630396", + "step": 5228, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017909424379467964, + "timestamp": "2025-10-01 03:27:31.632756", + "step": 5229, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.663544", + "step": 5229, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030676633468829095, + "timestamp": "2025-10-01 03:27:31.665868", + "step": 5230, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.696467", + "step": 5230, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013325146574061364, + "timestamp": "2025-10-01 03:27:31.699241", + "step": 5231, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.731445", + "step": 5231, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014555481902789325, + "timestamp": "2025-10-01 03:27:31.756122", + "step": 5232, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.786956", + "step": 5232, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029851889121346176, + "timestamp": "2025-10-01 03:27:31.789197", + "step": 5233, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.823720", + "step": 5233, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013440418988466263, + "timestamp": "2025-10-01 03:27:31.826255", + "step": 5234, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:31.856813", + "step": 5234, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.768560731667094e-05, + "timestamp": "2025-10-01 03:27:31.859150", + "step": 5235, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.889571", + "step": 5235, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008356157690286636, + "timestamp": "2025-10-01 03:27:31.913495", + "step": 5236, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.944809", + "step": 5236, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0058661652728915215, + "timestamp": "2025-10-01 03:27:31.948500", + "step": 5237, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:31.983483", + "step": 5237, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002323797205463052, + "timestamp": "2025-10-01 03:27:31.989607", + "step": 5238, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.024979", + "step": 5238, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019894420984201133, + "timestamp": "2025-10-01 03:27:32.027368", + "step": 5239, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.059284", + "step": 5239, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005621521850116551, + "timestamp": "2025-10-01 03:27:32.100385", + "step": 5240, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:32.130915", + "step": 5240, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003985036630183458, + "timestamp": "2025-10-01 03:27:32.133344", + "step": 5241, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.164778", + "step": 5241, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01844347082078457, + "timestamp": "2025-10-01 03:27:32.168242", + "step": 5242, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.200050", + "step": 5242, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01689941994845867, + "timestamp": "2025-10-01 03:27:32.202573", + "step": 5243, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.234331", + "step": 5243, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00376015598885715, + "timestamp": "2025-10-01 03:27:32.258071", + "step": 5244, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.289194", + "step": 5244, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.865729225566611e-05, + "timestamp": "2025-10-01 03:27:32.291780", + "step": 5245, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.324065", + "step": 5245, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021563985501416028, + "timestamp": "2025-10-01 03:27:32.327051", + "step": 5246, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.357681", + "step": 5246, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010745901381596923, + "timestamp": "2025-10-01 03:27:32.359983", + "step": 5247, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.390525", + "step": 5247, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004080051556229591, + "timestamp": "2025-10-01 03:27:32.414387", + "step": 5248, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.447980", + "step": 5248, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006325212307274342, + "timestamp": "2025-10-01 03:27:32.451101", + "step": 5249, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.482752", + "step": 5249, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004307362833060324, + "timestamp": "2025-10-01 03:27:32.485258", + "step": 5250, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.516202", + "step": 5250, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001888294646050781, + "timestamp": "2025-10-01 03:27:32.518414", + "step": 5251, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.549019", + "step": 5251, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010685178858693689, + "timestamp": "2025-10-01 03:27:32.573074", + "step": 5252, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.607262", + "step": 5252, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027621598565019667, + "timestamp": "2025-10-01 03:27:32.610251", + "step": 5253, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.641577", + "step": 5253, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011477385414764285, + "timestamp": "2025-10-01 03:27:32.643934", + "step": 5254, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.675626", + "step": 5254, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025020597968250513, + "timestamp": "2025-10-01 03:27:32.678010", + "step": 5255, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.709313", + "step": 5255, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012193126894999295, + "timestamp": "2025-10-01 03:27:32.733597", + "step": 5256, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:32.764164", + "step": 5256, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001664362265728414, + "timestamp": "2025-10-01 03:27:32.766819", + "step": 5257, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.797283", + "step": 5257, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024755721096880734, + "timestamp": "2025-10-01 03:27:32.799569", + "step": 5258, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.830511", + "step": 5258, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019160223891958594, + "timestamp": "2025-10-01 03:27:32.837682", + "step": 5259, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:32.869280", + "step": 5259, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007028497755527496, + "timestamp": "2025-10-01 03:27:32.892963", + "step": 5260, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:32.928870", + "step": 5260, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024410874175373465, + "timestamp": "2025-10-01 03:27:32.931110", + "step": 5261, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:32.961780", + "step": 5261, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010292568913428113, + "timestamp": "2025-10-01 03:27:32.963940", + "step": 5262, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:32.994312", + "step": 5262, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035111315082758665, + "timestamp": "2025-10-01 03:27:32.996810", + "step": 5263, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.027277", + "step": 5263, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002655611839145422, + "timestamp": "2025-10-01 03:27:33.050975", + "step": 5264, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:33.084593", + "step": 5264, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.841927097411826e-05, + "timestamp": "2025-10-01 03:27:33.086933", + "step": 5265, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:33.121547", + "step": 5265, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034219081862829626, + "timestamp": "2025-10-01 03:27:33.123784", + "step": 5266, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:33.155239", + "step": 5266, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00525332149118185, + "timestamp": "2025-10-01 03:27:33.157607", + "step": 5267, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.188802", + "step": 5267, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.794403402134776e-05, + "timestamp": "2025-10-01 03:27:33.213528", + "step": 5268, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.250708", + "step": 5268, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.723894668743014e-05, + "timestamp": "2025-10-01 03:27:33.253122", + "step": 5269, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.285537", + "step": 5269, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004053864977322519, + "timestamp": "2025-10-01 03:27:33.287825", + "step": 5270, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.318471", + "step": 5270, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018320402596145868, + "timestamp": "2025-10-01 03:27:33.320878", + "step": 5271, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.351881", + "step": 5271, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020925819408148527, + "timestamp": "2025-10-01 03:27:33.377631", + "step": 5272, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.413787", + "step": 5272, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004571461118757725, + "timestamp": "2025-10-01 03:27:33.416007", + "step": 5273, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.454783", + "step": 5273, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005177843850106001, + "timestamp": "2025-10-01 03:27:33.456675", + "step": 5274, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.493999", + "step": 5274, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012653259909711778, + "timestamp": "2025-10-01 03:27:33.496378", + "step": 5275, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.526910", + "step": 5275, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006454840302467346, + "timestamp": "2025-10-01 03:27:33.550722", + "step": 5276, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:27:33.581238", + "step": 5276, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01657613180577755, + "timestamp": "2025-10-01 03:27:33.583549", + "step": 5277, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.615221", + "step": 5277, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012150687689427286, + "timestamp": "2025-10-01 03:27:33.618748", + "step": 5278, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.655591", + "step": 5278, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003848227206617594, + "timestamp": "2025-10-01 03:27:33.658284", + "step": 5279, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.693202", + "step": 5279, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004107422661036253, + "timestamp": "2025-10-01 03:27:33.719623", + "step": 5280, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.749836", + "step": 5280, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00436131376773119, + "timestamp": "2025-10-01 03:27:33.752632", + "step": 5281, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.785292", + "step": 5281, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05475696548819542, + "timestamp": "2025-10-01 03:27:33.787599", + "step": 5282, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:33.827049", + "step": 5282, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0430518202483654, + "timestamp": "2025-10-01 03:27:33.832829", + "step": 5283, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.867124", + "step": 5283, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006727274158038199, + "timestamp": "2025-10-01 03:27:33.891386", + "step": 5284, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:33.923308", + "step": 5284, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.3968397221760824e-05, + "timestamp": "2025-10-01 03:27:33.926357", + "step": 5285, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:33.957823", + "step": 5285, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.843126175226644e-05, + "timestamp": "2025-10-01 03:27:33.960650", + "step": 5286, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:33.995225", + "step": 5286, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009468362666666508, + "timestamp": "2025-10-01 03:27:33.997880", + "step": 5287, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.041835", + "step": 5287, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001925126853166148, + "timestamp": "2025-10-01 03:27:34.065748", + "step": 5288, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.101008", + "step": 5288, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012630678247660398, + "timestamp": "2025-10-01 03:27:34.103165", + "step": 5289, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.133564", + "step": 5289, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013029914407525212, + "timestamp": "2025-10-01 03:27:34.135720", + "step": 5290, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.170663", + "step": 5290, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013374898117035627, + "timestamp": "2025-10-01 03:27:34.173831", + "step": 5291, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.206929", + "step": 5291, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023058701772242785, + "timestamp": "2025-10-01 03:27:34.231106", + "step": 5292, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:34.263798", + "step": 5292, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0033050477504730225, + "timestamp": "2025-10-01 03:27:34.266282", + "step": 5293, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:34.296753", + "step": 5293, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015877907571848482, + "timestamp": "2025-10-01 03:27:34.299457", + "step": 5294, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.335853", + "step": 5294, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009361408301629126, + "timestamp": "2025-10-01 03:27:34.338087", + "step": 5295, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.368986", + "step": 5295, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03220328316092491, + "timestamp": "2025-10-01 03:27:34.394227", + "step": 5296, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.425045", + "step": 5296, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005540590500459075, + "timestamp": "2025-10-01 03:27:34.427184", + "step": 5297, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.458026", + "step": 5297, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005435045808553696, + "timestamp": "2025-10-01 03:27:34.461132", + "step": 5298, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:34.493974", + "step": 5298, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.024606825783848763, + "timestamp": "2025-10-01 03:27:34.496570", + "step": 5299, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.535147", + "step": 5299, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.694030450191349e-05, + "timestamp": "2025-10-01 03:27:34.562491", + "step": 5300, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.593927", + "step": 5300, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.164163591573015e-05, + "timestamp": "2025-10-01 03:27:34.596530", + "step": 5301, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.627022", + "step": 5301, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011228054063394666, + "timestamp": "2025-10-01 03:27:34.629321", + "step": 5302, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.668123", + "step": 5302, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034865993075072765, + "timestamp": "2025-10-01 03:27:34.673814", + "step": 5303, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.707081", + "step": 5303, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007339764386415482, + "timestamp": "2025-10-01 03:27:34.730836", + "step": 5304, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.770238", + "step": 5304, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019286475435364991, + "timestamp": "2025-10-01 03:27:34.772676", + "step": 5305, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.803647", + "step": 5305, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006679811631329358, + "timestamp": "2025-10-01 03:27:34.806297", + "step": 5306, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:34.845832", + "step": 5306, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006305622402578592, + "timestamp": "2025-10-01 03:27:34.848017", + "step": 5307, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.879140", + "step": 5307, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003130224358756095, + "timestamp": "2025-10-01 03:27:34.905905", + "step": 5308, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.940543", + "step": 5308, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.022614979883656e-05, + "timestamp": "2025-10-01 03:27:34.942997", + "step": 5309, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:34.985445", + "step": 5309, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003261526580899954, + "timestamp": "2025-10-01 03:27:34.988009", + "step": 5310, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:35.019725", + "step": 5310, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.253469811985269e-05, + "timestamp": "2025-10-01 03:27:35.022111", + "step": 5311, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:35.053298", + "step": 5311, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019450264517217875, + "timestamp": "2025-10-01 03:27:35.077015", + "step": 5312, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:35.108355", + "step": 5312, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036065487074665725, + "timestamp": "2025-10-01 03:27:35.113014", + "step": 5313, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:35.146670", + "step": 5313, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024655256420373917, + "timestamp": "2025-10-01 03:27:35.149069", + "step": 5314, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:35.180479", + "step": 5314, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035129362368024886, + "timestamp": "2025-10-01 03:27:35.182991", + "step": 5315, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:35.215222", + "step": 5315, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002788689685985446, + "timestamp": "2025-10-01 03:27:35.239031", + "step": 5316, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:35.277197", + "step": 5316, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02207810990512371, + "timestamp": "2025-10-01 03:27:35.279752", + "step": 5317, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:35.310484", + "step": 5317, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.461644418071955e-05, + "timestamp": "2025-10-01 03:27:35.312886", + "step": 5318, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:35.343772", + "step": 5318, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.632901593344286e-05, + "timestamp": "2025-10-01 03:27:35.348219", + "step": 5319, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:35.378913", + "step": 5319, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04523327574133873, + "timestamp": "2025-10-01 03:27:35.402951", + "step": 5320, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:27:37.549618", + "step": 5320, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2347593.711840887, + "timestamp": "2025-10-01 03:27:37.553589", + "step": 5320, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:37.583830", + "step": 5320, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00207938882522285, + "timestamp": "2025-10-01 03:27:37.587256", + "step": 5321, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:37.618191", + "step": 5321, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002443920820951462, + "timestamp": "2025-10-01 03:27:37.620866", + "step": 5322, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:37.652098", + "step": 5322, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016438014572486281, + "timestamp": "2025-10-01 03:27:37.655274", + "step": 5323, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:37.686662", + "step": 5323, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.023495802655816078, + "timestamp": "2025-10-01 03:27:37.711101", + "step": 5324, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:37.743545", + "step": 5324, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.061162274330854416, + "timestamp": "2025-10-01 03:27:37.746175", + "step": 5325, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:37.777951", + "step": 5325, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006566951517015696, + "timestamp": "2025-10-01 03:27:37.780802", + "step": 5326, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:37.812226", + "step": 5326, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024223608488682657, + "timestamp": "2025-10-01 03:27:37.815137", + "step": 5327, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:37.846670", + "step": 5327, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04716891795396805, + "timestamp": "2025-10-01 03:27:37.873042", + "step": 5328, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:37.906213", + "step": 5328, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019702213467098773, + "timestamp": "2025-10-01 03:27:37.909102", + "step": 5329, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:37.941984", + "step": 5329, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001565483515150845, + "timestamp": "2025-10-01 03:27:37.945174", + "step": 5330, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:37.975867", + "step": 5330, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0068129198625683784, + "timestamp": "2025-10-01 03:27:37.978877", + "step": 5331, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.010319", + "step": 5331, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008904381655156612, + "timestamp": "2025-10-01 03:27:38.035014", + "step": 5332, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.066790", + "step": 5332, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045083541772328317, + "timestamp": "2025-10-01 03:27:38.069750", + "step": 5333, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.104859", + "step": 5333, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005596106639131904, + "timestamp": "2025-10-01 03:27:38.107932", + "step": 5334, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.138894", + "step": 5334, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01053521130234003, + "timestamp": "2025-10-01 03:27:38.141880", + "step": 5335, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.173041", + "step": 5335, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004192198975943029, + "timestamp": "2025-10-01 03:27:38.197704", + "step": 5336, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.229632", + "step": 5336, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021768203005194664, + "timestamp": "2025-10-01 03:27:38.233384", + "step": 5337, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.263946", + "step": 5337, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03711297735571861, + "timestamp": "2025-10-01 03:27:38.267049", + "step": 5338, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.298859", + "step": 5338, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006463281461037695, + "timestamp": "2025-10-01 03:27:38.302093", + "step": 5339, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:38.333018", + "step": 5339, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013627425767481327, + "timestamp": "2025-10-01 03:27:38.357133", + "step": 5340, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.388175", + "step": 5340, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018290430307388306, + "timestamp": "2025-10-01 03:27:38.390768", + "step": 5341, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.421291", + "step": 5341, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006016019033268094, + "timestamp": "2025-10-01 03:27:38.428385", + "step": 5342, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.469084", + "step": 5342, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01896066591143608, + "timestamp": "2025-10-01 03:27:38.472467", + "step": 5343, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.505025", + "step": 5343, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030543224420398474, + "timestamp": "2025-10-01 03:27:38.529231", + "step": 5344, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.562046", + "step": 5344, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022252954076975584, + "timestamp": "2025-10-01 03:27:38.564024", + "step": 5345, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.594136", + "step": 5345, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006900667678564787, + "timestamp": "2025-10-01 03:27:38.596604", + "step": 5346, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.626696", + "step": 5346, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022441542241722345, + "timestamp": "2025-10-01 03:27:38.628930", + "step": 5347, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:38.661632", + "step": 5347, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001480720384279266, + "timestamp": "2025-10-01 03:27:38.685194", + "step": 5348, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.715714", + "step": 5348, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012369928881525993, + "timestamp": "2025-10-01 03:27:38.717806", + "step": 5349, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:38.747875", + "step": 5349, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019003673223778605, + "timestamp": "2025-10-01 03:27:38.750106", + "step": 5350, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.780254", + "step": 5350, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015075383998919278, + "timestamp": "2025-10-01 03:27:38.782544", + "step": 5351, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:38.812782", + "step": 5351, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0325605683028698, + "timestamp": "2025-10-01 03:27:38.836901", + "step": 5352, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:38.868995", + "step": 5352, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00437958724796772, + "timestamp": "2025-10-01 03:27:38.871636", + "step": 5353, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.902792", + "step": 5353, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003177281701937318, + "timestamp": "2025-10-01 03:27:38.905160", + "step": 5354, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:38.936748", + "step": 5354, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001487177680246532, + "timestamp": "2025-10-01 03:27:38.938982", + "step": 5355, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:38.976594", + "step": 5355, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006859270506538451, + "timestamp": "2025-10-01 03:27:39.000231", + "step": 5356, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:39.030896", + "step": 5356, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003014972433447838, + "timestamp": "2025-10-01 03:27:39.033908", + "step": 5357, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.067203", + "step": 5357, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.023369843140244484, + "timestamp": "2025-10-01 03:27:39.069872", + "step": 5358, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.102416", + "step": 5358, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007018434349447489, + "timestamp": "2025-10-01 03:27:39.104635", + "step": 5359, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:39.136155", + "step": 5359, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007342284661717713, + "timestamp": "2025-10-01 03:27:39.160013", + "step": 5360, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.190883", + "step": 5360, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02955835685133934, + "timestamp": "2025-10-01 03:27:39.193107", + "step": 5361, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.223615", + "step": 5361, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005284816725179553, + "timestamp": "2025-10-01 03:27:39.225941", + "step": 5362, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.256914", + "step": 5362, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002857560757547617, + "timestamp": "2025-10-01 03:27:39.259150", + "step": 5363, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:39.290407", + "step": 5363, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01496773213148117, + "timestamp": "2025-10-01 03:27:39.314097", + "step": 5364, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.347467", + "step": 5364, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.037614114582538605, + "timestamp": "2025-10-01 03:27:39.349667", + "step": 5365, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.380316", + "step": 5365, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011462775059044361, + "timestamp": "2025-10-01 03:27:39.382707", + "step": 5366, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.415565", + "step": 5366, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001219974015839398, + "timestamp": "2025-10-01 03:27:39.418463", + "step": 5367, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.449840", + "step": 5367, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021377133671194315, + "timestamp": "2025-10-01 03:27:39.473592", + "step": 5368, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:39.504772", + "step": 5368, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0033515074755996466, + "timestamp": "2025-10-01 03:27:39.507001", + "step": 5369, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.537087", + "step": 5369, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010115314507856965, + "timestamp": "2025-10-01 03:27:39.539230", + "step": 5370, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.572257", + "step": 5370, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009291781461797655, + "timestamp": "2025-10-01 03:27:39.574357", + "step": 5371, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.604583", + "step": 5371, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010340800508856773, + "timestamp": "2025-10-01 03:27:39.628437", + "step": 5372, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.659785", + "step": 5372, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003876728005707264, + "timestamp": "2025-10-01 03:27:39.661946", + "step": 5373, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.692868", + "step": 5373, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000392202811781317, + "timestamp": "2025-10-01 03:27:39.695086", + "step": 5374, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.725733", + "step": 5374, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012448660098016262, + "timestamp": "2025-10-01 03:27:39.728042", + "step": 5375, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:39.759278", + "step": 5375, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018472161144018173, + "timestamp": "2025-10-01 03:27:39.783876", + "step": 5376, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:39.825130", + "step": 5376, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002666983986273408, + "timestamp": "2025-10-01 03:27:39.827252", + "step": 5377, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.859068", + "step": 5377, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003722157794982195, + "timestamp": "2025-10-01 03:27:39.861355", + "step": 5378, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.900372", + "step": 5378, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004546397365629673, + "timestamp": "2025-10-01 03:27:39.906927", + "step": 5379, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.937399", + "step": 5379, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016258491668850183, + "timestamp": "2025-10-01 03:27:39.961148", + "step": 5380, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:39.992664", + "step": 5380, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006570596597157419, + "timestamp": "2025-10-01 03:27:39.995031", + "step": 5381, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:40.027774", + "step": 5381, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008282664348371327, + "timestamp": "2025-10-01 03:27:40.030144", + "step": 5382, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:40.060324", + "step": 5382, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002573660749476403, + "timestamp": "2025-10-01 03:27:40.062792", + "step": 5383, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:40.093734", + "step": 5383, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004443868820089847, + "timestamp": "2025-10-01 03:27:40.117542", + "step": 5384, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.147845", + "step": 5384, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015286322450265288, + "timestamp": "2025-10-01 03:27:40.150373", + "step": 5385, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:40.181318", + "step": 5385, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0073075927793979645, + "timestamp": "2025-10-01 03:27:40.183525", + "step": 5386, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.214421", + "step": 5386, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032499132212251425, + "timestamp": "2025-10-01 03:27:40.216860", + "step": 5387, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.247984", + "step": 5387, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0059063308872282505, + "timestamp": "2025-10-01 03:27:40.272504", + "step": 5388, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.303831", + "step": 5388, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016733843367546797, + "timestamp": "2025-10-01 03:27:40.307746", + "step": 5389, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.338345", + "step": 5389, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012412193231284618, + "timestamp": "2025-10-01 03:27:40.341676", + "step": 5390, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.374703", + "step": 5390, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005233392585068941, + "timestamp": "2025-10-01 03:27:40.376989", + "step": 5391, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.414831", + "step": 5391, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047952422755770385, + "timestamp": "2025-10-01 03:27:40.438568", + "step": 5392, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:40.470851", + "step": 5392, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010596722131595016, + "timestamp": "2025-10-01 03:27:40.472961", + "step": 5393, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.507895", + "step": 5393, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006092976545915008, + "timestamp": "2025-10-01 03:27:40.510639", + "step": 5394, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.546557", + "step": 5394, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004010824952274561, + "timestamp": "2025-10-01 03:27:40.549979", + "step": 5395, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.587357", + "step": 5395, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013526393740903586, + "timestamp": "2025-10-01 03:27:40.611231", + "step": 5396, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.644676", + "step": 5396, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0051815807819366455, + "timestamp": "2025-10-01 03:27:40.646768", + "step": 5397, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.703096", + "step": 5397, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01819513365626335, + "timestamp": "2025-10-01 03:27:40.705148", + "step": 5398, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.737847", + "step": 5398, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004853521473705769, + "timestamp": "2025-10-01 03:27:40.742032", + "step": 5399, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.778584", + "step": 5399, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005126436008140445, + "timestamp": "2025-10-01 03:27:40.805206", + "step": 5400, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.839602", + "step": 5400, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03925076872110367, + "timestamp": "2025-10-01 03:27:40.842476", + "step": 5401, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:40.874167", + "step": 5401, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00760737294331193, + "timestamp": "2025-10-01 03:27:40.876279", + "step": 5402, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:40.916736", + "step": 5402, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006079916493035853, + "timestamp": "2025-10-01 03:27:40.919288", + "step": 5403, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:40.951541", + "step": 5403, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029053636826574802, + "timestamp": "2025-10-01 03:27:40.975385", + "step": 5404, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:41.020396", + "step": 5404, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001215574098750949, + "timestamp": "2025-10-01 03:27:41.022510", + "step": 5405, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.054869", + "step": 5405, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014858426293358207, + "timestamp": "2025-10-01 03:27:41.056942", + "step": 5406, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.088169", + "step": 5406, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031502849888056517, + "timestamp": "2025-10-01 03:27:41.090516", + "step": 5407, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.129044", + "step": 5407, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011625950719462708, + "timestamp": "2025-10-01 03:27:41.152662", + "step": 5408, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.183793", + "step": 5408, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002658706798683852, + "timestamp": "2025-10-01 03:27:41.186284", + "step": 5409, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.219580", + "step": 5409, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008147020707838237, + "timestamp": "2025-10-01 03:27:41.221970", + "step": 5410, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.254330", + "step": 5410, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007735795225016773, + "timestamp": "2025-10-01 03:27:41.256599", + "step": 5411, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.292466", + "step": 5411, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001487956615164876, + "timestamp": "2025-10-01 03:27:41.316273", + "step": 5412, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.348674", + "step": 5412, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007384677301160991, + "timestamp": "2025-10-01 03:27:41.350830", + "step": 5413, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:41.386603", + "step": 5413, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0632721409201622, + "timestamp": "2025-10-01 03:27:41.388866", + "step": 5414, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.425240", + "step": 5414, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003767327289097011, + "timestamp": "2025-10-01 03:27:41.427460", + "step": 5415, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.459982", + "step": 5415, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022982103109825402, + "timestamp": "2025-10-01 03:27:41.483683", + "step": 5416, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.515306", + "step": 5416, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0061509921215474606, + "timestamp": "2025-10-01 03:27:41.517635", + "step": 5417, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.557962", + "step": 5417, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004067087429575622, + "timestamp": "2025-10-01 03:27:41.560401", + "step": 5418, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.592610", + "step": 5418, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016548538114875555, + "timestamp": "2025-10-01 03:27:41.595096", + "step": 5419, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.635287", + "step": 5419, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035748028312809765, + "timestamp": "2025-10-01 03:27:41.658981", + "step": 5420, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:41.692890", + "step": 5420, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008070244221016765, + "timestamp": "2025-10-01 03:27:41.695210", + "step": 5421, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.733613", + "step": 5421, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010281304130330682, + "timestamp": "2025-10-01 03:27:41.735634", + "step": 5422, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.767155", + "step": 5422, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018928924691863358, + "timestamp": "2025-10-01 03:27:41.769331", + "step": 5423, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.814550", + "step": 5423, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001044439384713769, + "timestamp": "2025-10-01 03:27:41.838382", + "step": 5424, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.870000", + "step": 5424, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012786968727596104, + "timestamp": "2025-10-01 03:27:41.872879", + "step": 5425, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.910570", + "step": 5425, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0036037247627973557, + "timestamp": "2025-10-01 03:27:41.913648", + "step": 5426, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.950749", + "step": 5426, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003174909157678485, + "timestamp": "2025-10-01 03:27:41.952960", + "step": 5427, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:41.983806", + "step": 5427, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013037150725722313, + "timestamp": "2025-10-01 03:27:42.007520", + "step": 5428, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.040606", + "step": 5428, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021488148195203394, + "timestamp": "2025-10-01 03:27:42.042743", + "step": 5429, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.075331", + "step": 5429, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047890524729155004, + "timestamp": "2025-10-01 03:27:42.077431", + "step": 5430, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:42.119270", + "step": 5430, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003244638501200825, + "timestamp": "2025-10-01 03:27:42.121643", + "step": 5431, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:42.153990", + "step": 5431, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008401813684031367, + "timestamp": "2025-10-01 03:27:42.177674", + "step": 5432, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.215576", + "step": 5432, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013786371564492583, + "timestamp": "2025-10-01 03:27:42.218094", + "step": 5433, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.249896", + "step": 5433, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018084047362208366, + "timestamp": "2025-10-01 03:27:42.252157", + "step": 5434, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:42.291163", + "step": 5434, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.06968997418880463, + "timestamp": "2025-10-01 03:27:42.293384", + "step": 5435, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.324779", + "step": 5435, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002516261884011328, + "timestamp": "2025-10-01 03:27:42.349130", + "step": 5436, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:42.386125", + "step": 5436, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028860062593594193, + "timestamp": "2025-10-01 03:27:42.388538", + "step": 5437, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:42.420573", + "step": 5437, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010592704638838768, + "timestamp": "2025-10-01 03:27:42.422973", + "step": 5438, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.458308", + "step": 5438, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.403100269380957e-05, + "timestamp": "2025-10-01 03:27:42.460616", + "step": 5439, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.492886", + "step": 5439, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001068574856617488, + "timestamp": "2025-10-01 03:27:42.516483", + "step": 5440, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:42.548208", + "step": 5440, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001180425868369639, + "timestamp": "2025-10-01 03:27:42.550566", + "step": 5441, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.585995", + "step": 5441, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03301016986370087, + "timestamp": "2025-10-01 03:27:42.589036", + "step": 5442, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.620776", + "step": 5442, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006864985334686935, + "timestamp": "2025-10-01 03:27:42.623023", + "step": 5443, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.654708", + "step": 5443, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018195057287812233, + "timestamp": "2025-10-01 03:27:42.678400", + "step": 5444, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.726442", + "step": 5444, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017413195746485144, + "timestamp": "2025-10-01 03:27:42.728837", + "step": 5445, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.760534", + "step": 5445, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005515703815035522, + "timestamp": "2025-10-01 03:27:42.762633", + "step": 5446, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.807074", + "step": 5446, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044978881487622857, + "timestamp": "2025-10-01 03:27:42.810310", + "step": 5447, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.845020", + "step": 5447, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002784192096441984, + "timestamp": "2025-10-01 03:27:42.870989", + "step": 5448, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.905001", + "step": 5448, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032816329039633274, + "timestamp": "2025-10-01 03:27:42.910288", + "step": 5449, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.953527", + "step": 5449, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005694173276424408, + "timestamp": "2025-10-01 03:27:42.955653", + "step": 5450, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:42.994943", + "step": 5450, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014399508654605597, + "timestamp": "2025-10-01 03:27:42.997388", + "step": 5451, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.028648", + "step": 5451, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024958557332865894, + "timestamp": "2025-10-01 03:27:43.052463", + "step": 5452, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:43.094513", + "step": 5452, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001721510197967291, + "timestamp": "2025-10-01 03:27:43.097026", + "step": 5453, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.129638", + "step": 5453, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038959193625487387, + "timestamp": "2025-10-01 03:27:43.132014", + "step": 5454, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.164216", + "step": 5454, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02345411852002144, + "timestamp": "2025-10-01 03:27:43.171927", + "step": 5455, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.204737", + "step": 5455, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029202571022324264, + "timestamp": "2025-10-01 03:27:43.228741", + "step": 5456, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:43.271789", + "step": 5456, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00046322113485075533, + "timestamp": "2025-10-01 03:27:43.273952", + "step": 5457, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.309047", + "step": 5457, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005733984871767461, + "timestamp": "2025-10-01 03:27:43.311284", + "step": 5458, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.344394", + "step": 5458, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007758277468383312, + "timestamp": "2025-10-01 03:27:43.346657", + "step": 5459, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.379238", + "step": 5459, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006361458799801767, + "timestamp": "2025-10-01 03:27:43.403054", + "step": 5460, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:27:43.442591", + "step": 5460, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020188656635582447, + "timestamp": "2025-10-01 03:27:43.450227", + "step": 5461, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.490372", + "step": 5461, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006199758499860764, + "timestamp": "2025-10-01 03:27:43.493148", + "step": 5462, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.524333", + "step": 5462, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003324436256662011, + "timestamp": "2025-10-01 03:27:43.526939", + "step": 5463, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.568775", + "step": 5463, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010834436397999525, + "timestamp": "2025-10-01 03:27:43.592926", + "step": 5464, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.626164", + "step": 5464, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015613343566656113, + "timestamp": "2025-10-01 03:27:43.628663", + "step": 5465, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.666209", + "step": 5465, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007395472843199968, + "timestamp": "2025-10-01 03:27:43.669030", + "step": 5466, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.701287", + "step": 5466, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020274579583201557, + "timestamp": "2025-10-01 03:27:43.703624", + "step": 5467, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:43.736777", + "step": 5467, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003534948220476508, + "timestamp": "2025-10-01 03:27:43.766573", + "step": 5468, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.805113", + "step": 5468, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001360071124508977, + "timestamp": "2025-10-01 03:27:43.807276", + "step": 5469, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:43.842956", + "step": 5469, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0039820983074605465, + "timestamp": "2025-10-01 03:27:43.845521", + "step": 5470, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.877126", + "step": 5470, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005576745606958866, + "timestamp": "2025-10-01 03:27:43.879390", + "step": 5471, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:43.910423", + "step": 5471, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009321296238340437, + "timestamp": "2025-10-01 03:27:43.934298", + "step": 5472, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:27:46.526247", + "step": 5472, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2698500.228851841, + "timestamp": "2025-10-01 03:27:46.528667", + "step": 5472, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:46.560122", + "step": 5472, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04140166565775871, + "timestamp": "2025-10-01 03:27:46.562238", + "step": 5473, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:46.594686", + "step": 5473, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.030031811445951462, + "timestamp": "2025-10-01 03:27:46.597059", + "step": 5474, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:46.630162", + "step": 5474, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001198252648464404, + "timestamp": "2025-10-01 03:27:46.633444", + "step": 5475, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:46.665799", + "step": 5475, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0537257194519043, + "timestamp": "2025-10-01 03:27:46.690002", + "step": 5476, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:46.724905", + "step": 5476, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003347797319293022, + "timestamp": "2025-10-01 03:27:46.727377", + "step": 5477, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:46.759453", + "step": 5477, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005041165859438479, + "timestamp": "2025-10-01 03:27:46.761966", + "step": 5478, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:46.796408", + "step": 5478, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.366568413795903e-05, + "timestamp": "2025-10-01 03:27:46.798379", + "step": 5479, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:46.833642", + "step": 5479, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05405675992369652, + "timestamp": "2025-10-01 03:27:46.858455", + "step": 5480, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:46.896916", + "step": 5480, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008122880244627595, + "timestamp": "2025-10-01 03:27:46.899132", + "step": 5481, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:46.941158", + "step": 5481, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011284583015367389, + "timestamp": "2025-10-01 03:27:46.943509", + "step": 5482, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:46.979412", + "step": 5482, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014662604371551424, + "timestamp": "2025-10-01 03:27:46.981438", + "step": 5483, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.013789", + "step": 5483, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002244054339826107, + "timestamp": "2025-10-01 03:27:47.037688", + "step": 5484, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.070808", + "step": 5484, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021224978263489902, + "timestamp": "2025-10-01 03:27:47.073124", + "step": 5485, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.105395", + "step": 5485, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0037852178793400526, + "timestamp": "2025-10-01 03:27:47.110141", + "step": 5486, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.145776", + "step": 5486, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003524689527694136, + "timestamp": "2025-10-01 03:27:47.147793", + "step": 5487, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.189151", + "step": 5487, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015217023610603064, + "timestamp": "2025-10-01 03:27:47.213264", + "step": 5488, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.246131", + "step": 5488, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002709569875150919, + "timestamp": "2025-10-01 03:27:47.248206", + "step": 5489, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.289441", + "step": 5489, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0233443733304739, + "timestamp": "2025-10-01 03:27:47.291949", + "step": 5490, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.326854", + "step": 5490, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03929053992033005, + "timestamp": "2025-10-01 03:27:47.329297", + "step": 5491, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.362753", + "step": 5491, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.033610451966524124, + "timestamp": "2025-10-01 03:27:47.386583", + "step": 5492, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:47.425131", + "step": 5492, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.658011938678101e-05, + "timestamp": "2025-10-01 03:27:47.427517", + "step": 5493, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:47.479498", + "step": 5493, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004222140123602003, + "timestamp": "2025-10-01 03:27:47.481834", + "step": 5494, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:47.514384", + "step": 5494, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019725805148482323, + "timestamp": "2025-10-01 03:27:47.516573", + "step": 5495, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.550129", + "step": 5495, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0398297943174839, + "timestamp": "2025-10-01 03:27:47.573899", + "step": 5496, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.607808", + "step": 5496, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002067637542495504, + "timestamp": "2025-10-01 03:27:47.610022", + "step": 5497, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.643570", + "step": 5497, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005681797047145665, + "timestamp": "2025-10-01 03:27:47.646384", + "step": 5498, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.677516", + "step": 5498, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024808457237668335, + "timestamp": "2025-10-01 03:27:47.680526", + "step": 5499, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:47.712501", + "step": 5499, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047532664029859006, + "timestamp": "2025-10-01 03:27:47.736353", + "step": 5500, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 5500", + "timestamp": "2025-10-01 03:27:52.478015", + "step": 5500, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:52.520229", + "step": 5500, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029123222338967025, + "timestamp": "2025-10-01 03:27:52.523101", + "step": 5501, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:52.556116", + "step": 5501, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009841850260272622, + "timestamp": "2025-10-01 03:27:52.559240", + "step": 5502, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:52.592661", + "step": 5502, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001583641511388123, + "timestamp": "2025-10-01 03:27:52.595913", + "step": 5503, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:52.628229", + "step": 5503, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0027766809798777103, + "timestamp": "2025-10-01 03:27:52.652656", + "step": 5504, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:52.683869", + "step": 5504, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002935274678748101, + "timestamp": "2025-10-01 03:27:52.686875", + "step": 5505, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:52.720419", + "step": 5505, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007945424295030534, + "timestamp": "2025-10-01 03:27:52.723360", + "step": 5506, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:52.756681", + "step": 5506, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000266241462668404, + "timestamp": "2025-10-01 03:27:52.759547", + "step": 5507, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:52.793387", + "step": 5507, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005406553857028484, + "timestamp": "2025-10-01 03:27:52.817272", + "step": 5508, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:52.850034", + "step": 5508, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001384633214911446, + "timestamp": "2025-10-01 03:27:52.852167", + "step": 5509, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:52.887513", + "step": 5509, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016789177432656288, + "timestamp": "2025-10-01 03:27:52.898763", + "step": 5510, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:52.931666", + "step": 5510, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021120423916727304, + "timestamp": "2025-10-01 03:27:52.934469", + "step": 5511, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:52.981951", + "step": 5511, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002693373244255781, + "timestamp": "2025-10-01 03:27:53.006954", + "step": 5512, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.048437", + "step": 5512, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007930410210974514, + "timestamp": "2025-10-01 03:27:53.051060", + "step": 5513, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.092208", + "step": 5513, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005453981459140778, + "timestamp": "2025-10-01 03:27:53.095158", + "step": 5514, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.135784", + "step": 5514, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003674288745969534, + "timestamp": "2025-10-01 03:27:53.138528", + "step": 5515, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.171046", + "step": 5515, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005613579414784908, + "timestamp": "2025-10-01 03:27:53.195085", + "step": 5516, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.225665", + "step": 5516, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01636255718767643, + "timestamp": "2025-10-01 03:27:53.230247", + "step": 5517, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:53.269838", + "step": 5517, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003893979301210493, + "timestamp": "2025-10-01 03:27:53.273336", + "step": 5518, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.307786", + "step": 5518, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019020758336409926, + "timestamp": "2025-10-01 03:27:53.314760", + "step": 5519, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.349924", + "step": 5519, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004126966930925846, + "timestamp": "2025-10-01 03:27:53.373647", + "step": 5520, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:53.425181", + "step": 5520, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006178472540341318, + "timestamp": "2025-10-01 03:27:53.428921", + "step": 5521, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.477251", + "step": 5521, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013626531697809696, + "timestamp": "2025-10-01 03:27:53.480186", + "step": 5522, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.517900", + "step": 5522, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019263141439296305, + "timestamp": "2025-10-01 03:27:53.525371", + "step": 5523, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:53.565304", + "step": 5523, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000414087378885597, + "timestamp": "2025-10-01 03:27:53.589291", + "step": 5524, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.636948", + "step": 5524, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033491456997580826, + "timestamp": "2025-10-01 03:27:53.640038", + "step": 5525, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.675316", + "step": 5525, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002021233784034848, + "timestamp": "2025-10-01 03:27:53.678282", + "step": 5526, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.750737", + "step": 5526, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008315636776387691, + "timestamp": "2025-10-01 03:27:53.754887", + "step": 5527, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.796351", + "step": 5527, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035399540793150663, + "timestamp": "2025-10-01 03:27:53.823914", + "step": 5528, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.859936", + "step": 5528, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001636083354242146, + "timestamp": "2025-10-01 03:27:53.863904", + "step": 5529, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:53.907436", + "step": 5529, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014581714058294892, + "timestamp": "2025-10-01 03:27:53.909586", + "step": 5530, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.952826", + "step": 5530, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043243358959443867, + "timestamp": "2025-10-01 03:27:53.955144", + "step": 5531, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:53.987602", + "step": 5531, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005608889274299145, + "timestamp": "2025-10-01 03:27:54.011518", + "step": 5532, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.062671", + "step": 5532, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00349906412884593, + "timestamp": "2025-10-01 03:27:54.065754", + "step": 5533, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.106980", + "step": 5533, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01602839305996895, + "timestamp": "2025-10-01 03:27:54.109336", + "step": 5534, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.147311", + "step": 5534, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003281478537246585, + "timestamp": "2025-10-01 03:27:54.150538", + "step": 5535, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.189418", + "step": 5535, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013961560325697064, + "timestamp": "2025-10-01 03:27:54.213330", + "step": 5536, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.258043", + "step": 5536, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000569597352296114, + "timestamp": "2025-10-01 03:27:54.260305", + "step": 5537, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:54.305320", + "step": 5537, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013818147126585245, + "timestamp": "2025-10-01 03:27:54.308564", + "step": 5538, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.354886", + "step": 5538, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003820277052000165, + "timestamp": "2025-10-01 03:27:54.357255", + "step": 5539, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:54.392074", + "step": 5539, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013900151243433356, + "timestamp": "2025-10-01 03:27:54.415940", + "step": 5540, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.451237", + "step": 5540, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021801732946187258, + "timestamp": "2025-10-01 03:27:54.454482", + "step": 5541, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.490973", + "step": 5541, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031344080343842506, + "timestamp": "2025-10-01 03:27:54.493344", + "step": 5542, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.523495", + "step": 5542, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000450542924227193, + "timestamp": "2025-10-01 03:27:54.525809", + "step": 5543, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.561602", + "step": 5543, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044626041199080646, + "timestamp": "2025-10-01 03:27:54.585585", + "step": 5544, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.642420", + "step": 5544, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00482234125956893, + "timestamp": "2025-10-01 03:27:54.646181", + "step": 5545, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.692368", + "step": 5545, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002110153203830123, + "timestamp": "2025-10-01 03:27:54.702495", + "step": 5546, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.747987", + "step": 5546, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005678745801560581, + "timestamp": "2025-10-01 03:27:54.753945", + "step": 5547, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:54.789220", + "step": 5547, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.022754153236746788, + "timestamp": "2025-10-01 03:27:54.816558", + "step": 5548, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.868551", + "step": 5548, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004017206374555826, + "timestamp": "2025-10-01 03:27:54.877787", + "step": 5549, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.939049", + "step": 5549, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010244447039440274, + "timestamp": "2025-10-01 03:27:54.941750", + "step": 5550, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:54.974795", + "step": 5550, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026954352855682373, + "timestamp": "2025-10-01 03:27:54.984271", + "step": 5551, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.040673", + "step": 5551, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004312192089855671, + "timestamp": "2025-10-01 03:27:55.069717", + "step": 5552, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.135716", + "step": 5552, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013057856122031808, + "timestamp": "2025-10-01 03:27:55.144599", + "step": 5553, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.183312", + "step": 5553, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029297824949026108, + "timestamp": "2025-10-01 03:27:55.185772", + "step": 5554, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.221160", + "step": 5554, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008569851634092629, + "timestamp": "2025-10-01 03:27:55.237244", + "step": 5555, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.276967", + "step": 5555, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013970501720905304, + "timestamp": "2025-10-01 03:27:55.302542", + "step": 5556, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.333572", + "step": 5556, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002732501830905676, + "timestamp": "2025-10-01 03:27:55.336285", + "step": 5557, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.367158", + "step": 5557, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009824135340750217, + "timestamp": "2025-10-01 03:27:55.369992", + "step": 5558, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.403588", + "step": 5558, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007138424552977085, + "timestamp": "2025-10-01 03:27:55.410905", + "step": 5559, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.444115", + "step": 5559, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024597938172519207, + "timestamp": "2025-10-01 03:27:55.474695", + "step": 5560, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.516241", + "step": 5560, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005760946660302579, + "timestamp": "2025-10-01 03:27:55.519171", + "step": 5561, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.555505", + "step": 5561, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034505054354667664, + "timestamp": "2025-10-01 03:27:55.563539", + "step": 5562, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.595368", + "step": 5562, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005733467405661941, + "timestamp": "2025-10-01 03:27:55.598748", + "step": 5563, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.649697", + "step": 5563, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017964119324460626, + "timestamp": "2025-10-01 03:27:55.675211", + "step": 5564, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.708388", + "step": 5564, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043019233271479607, + "timestamp": "2025-10-01 03:27:55.717674", + "step": 5565, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:55.756246", + "step": 5565, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010624075366649777, + "timestamp": "2025-10-01 03:27:55.768578", + "step": 5566, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.806899", + "step": 5566, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008346411399543285, + "timestamp": "2025-10-01 03:27:55.817276", + "step": 5567, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.848527", + "step": 5567, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006878994754515588, + "timestamp": "2025-10-01 03:27:55.875731", + "step": 5568, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.906341", + "step": 5568, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006042029126547277, + "timestamp": "2025-10-01 03:27:55.908858", + "step": 5569, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.939670", + "step": 5569, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024482738808728755, + "timestamp": "2025-10-01 03:27:55.941859", + "step": 5570, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:55.973714", + "step": 5570, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01838468201458454, + "timestamp": "2025-10-01 03:27:55.975949", + "step": 5571, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.015166", + "step": 5571, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030711875297129154, + "timestamp": "2025-10-01 03:27:56.039433", + "step": 5572, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.070323", + "step": 5572, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036249373806640506, + "timestamp": "2025-10-01 03:27:56.072381", + "step": 5573, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:56.103593", + "step": 5573, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014036375097930431, + "timestamp": "2025-10-01 03:27:56.105792", + "step": 5574, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:56.136240", + "step": 5574, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009561346843838692, + "timestamp": "2025-10-01 03:27:56.138576", + "step": 5575, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:56.169806", + "step": 5575, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.201424836646765e-05, + "timestamp": "2025-10-01 03:27:56.193661", + "step": 5576, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.224359", + "step": 5576, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001856417628005147, + "timestamp": "2025-10-01 03:27:56.227268", + "step": 5577, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.258639", + "step": 5577, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028643712867051363, + "timestamp": "2025-10-01 03:27:56.260894", + "step": 5578, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.291824", + "step": 5578, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00039044287404976785, + "timestamp": "2025-10-01 03:27:56.294576", + "step": 5579, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.326086", + "step": 5579, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021988714579492807, + "timestamp": "2025-10-01 03:27:56.349734", + "step": 5580, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:56.380301", + "step": 5580, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031201550154946744, + "timestamp": "2025-10-01 03:27:56.382413", + "step": 5581, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.413239", + "step": 5581, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001427980896551162, + "timestamp": "2025-10-01 03:27:56.415402", + "step": 5582, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.446205", + "step": 5582, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009098982438445091, + "timestamp": "2025-10-01 03:27:56.451292", + "step": 5583, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.481934", + "step": 5583, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.702148625161499e-05, + "timestamp": "2025-10-01 03:27:56.505521", + "step": 5584, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.535966", + "step": 5584, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027910174685530365, + "timestamp": "2025-10-01 03:27:56.537998", + "step": 5585, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.568782", + "step": 5585, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025011293473653495, + "timestamp": "2025-10-01 03:27:56.570909", + "step": 5586, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:56.603657", + "step": 5586, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017893003532662988, + "timestamp": "2025-10-01 03:27:56.605775", + "step": 5587, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.636227", + "step": 5587, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011236850405111909, + "timestamp": "2025-10-01 03:27:56.660045", + "step": 5588, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.691086", + "step": 5588, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013117922469973564, + "timestamp": "2025-10-01 03:27:56.693277", + "step": 5589, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.724496", + "step": 5589, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019701993733178824, + "timestamp": "2025-10-01 03:27:56.726971", + "step": 5590, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.765968", + "step": 5590, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.6963115892140195e-05, + "timestamp": "2025-10-01 03:27:56.768753", + "step": 5591, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:56.801169", + "step": 5591, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007697258144617081, + "timestamp": "2025-10-01 03:27:56.825122", + "step": 5592, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:56.863976", + "step": 5592, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025057457969523966, + "timestamp": "2025-10-01 03:27:56.866569", + "step": 5593, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.899968", + "step": 5593, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017795959720388055, + "timestamp": "2025-10-01 03:27:56.902450", + "step": 5594, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:56.933552", + "step": 5594, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001358592271571979, + "timestamp": "2025-10-01 03:27:56.935670", + "step": 5595, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:56.966430", + "step": 5595, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022029659885447472, + "timestamp": "2025-10-01 03:27:56.990286", + "step": 5596, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.021446", + "step": 5596, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.039066935656592e-05, + "timestamp": "2025-10-01 03:27:57.025474", + "step": 5597, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.057383", + "step": 5597, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00857391208410263, + "timestamp": "2025-10-01 03:27:57.060680", + "step": 5598, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.091529", + "step": 5598, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003585380967706442, + "timestamp": "2025-10-01 03:27:57.093752", + "step": 5599, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.124122", + "step": 5599, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.454171620542184e-05, + "timestamp": "2025-10-01 03:27:57.147991", + "step": 5600, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.180060", + "step": 5600, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00816528219729662, + "timestamp": "2025-10-01 03:27:57.182532", + "step": 5601, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.214105", + "step": 5601, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002739075571298599, + "timestamp": "2025-10-01 03:27:57.216578", + "step": 5602, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.247629", + "step": 5602, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.368274186272174e-05, + "timestamp": "2025-10-01 03:27:57.250060", + "step": 5603, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.280629", + "step": 5603, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035686427145265043, + "timestamp": "2025-10-01 03:27:57.305561", + "step": 5604, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.337504", + "step": 5604, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014549707702826709, + "timestamp": "2025-10-01 03:27:57.339569", + "step": 5605, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.370364", + "step": 5605, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006289519951678813, + "timestamp": "2025-10-01 03:27:57.372757", + "step": 5606, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.403721", + "step": 5606, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01378419529646635, + "timestamp": "2025-10-01 03:27:57.406060", + "step": 5607, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.437540", + "step": 5607, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005234855692833662, + "timestamp": "2025-10-01 03:27:57.461021", + "step": 5608, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.492161", + "step": 5608, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009391118655912578, + "timestamp": "2025-10-01 03:27:57.494179", + "step": 5609, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:27:57.525604", + "step": 5609, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004482906952034682, + "timestamp": "2025-10-01 03:27:57.527630", + "step": 5610, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.558179", + "step": 5610, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002548294432926923, + "timestamp": "2025-10-01 03:27:57.560328", + "step": 5611, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.591987", + "step": 5611, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.435094787273556e-05, + "timestamp": "2025-10-01 03:27:57.615948", + "step": 5612, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:57.646565", + "step": 5612, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014698781888000667, + "timestamp": "2025-10-01 03:27:57.648788", + "step": 5613, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.679034", + "step": 5613, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011605177132878453, + "timestamp": "2025-10-01 03:27:57.681252", + "step": 5614, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.712351", + "step": 5614, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012461826554499567, + "timestamp": "2025-10-01 03:27:57.714606", + "step": 5615, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.746058", + "step": 5615, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005943202413618565, + "timestamp": "2025-10-01 03:27:57.769946", + "step": 5616, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.801484", + "step": 5616, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.023700887337327003, + "timestamp": "2025-10-01 03:27:57.804835", + "step": 5617, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.844775", + "step": 5617, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017044378910213709, + "timestamp": "2025-10-01 03:27:57.847683", + "step": 5618, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.891197", + "step": 5618, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010905083763645962, + "timestamp": "2025-10-01 03:27:57.896264", + "step": 5619, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.937195", + "step": 5619, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.07912171632051468, + "timestamp": "2025-10-01 03:27:57.961257", + "step": 5620, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:57.999510", + "step": 5620, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000359882804332301, + "timestamp": "2025-10-01 03:27:58.001813", + "step": 5621, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:27:58.034594", + "step": 5621, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002592350821942091, + "timestamp": "2025-10-01 03:27:58.037644", + "step": 5622, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:58.073210", + "step": 5622, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04364144802093506, + "timestamp": "2025-10-01 03:27:58.081192", + "step": 5623, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:27:58.120914", + "step": 5623, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033392634941264987, + "timestamp": "2025-10-01 03:27:58.147362", + "step": 5624, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:28:00.608119", + "step": 5624, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2893526.6842548596, + "timestamp": "2025-10-01 03:28:00.613298", + "step": 5624, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:00.644356", + "step": 5624, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013616048090625554, + "timestamp": "2025-10-01 03:28:00.646549", + "step": 5625, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:00.679308", + "step": 5625, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.27108121686615e-05, + "timestamp": "2025-10-01 03:28:00.682638", + "step": 5626, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:00.715330", + "step": 5626, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013818129664286971, + "timestamp": "2025-10-01 03:28:00.717458", + "step": 5627, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:00.748915", + "step": 5627, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.68231218191795e-05, + "timestamp": "2025-10-01 03:28:00.773502", + "step": 5628, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:00.807747", + "step": 5628, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021978768054395914, + "timestamp": "2025-10-01 03:28:00.810485", + "step": 5629, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:00.844998", + "step": 5629, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02121070958673954, + "timestamp": "2025-10-01 03:28:00.847379", + "step": 5630, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:00.878477", + "step": 5630, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011044740676879883, + "timestamp": "2025-10-01 03:28:00.881002", + "step": 5631, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:00.912605", + "step": 5631, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00405313353985548, + "timestamp": "2025-10-01 03:28:00.938343", + "step": 5632, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:00.973192", + "step": 5632, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013163765834178776, + "timestamp": "2025-10-01 03:28:00.975343", + "step": 5633, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.007053", + "step": 5633, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014203808677848428, + "timestamp": "2025-10-01 03:28:01.009271", + "step": 5634, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:01.041515", + "step": 5634, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008655980345793068, + "timestamp": "2025-10-01 03:28:01.045471", + "step": 5635, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.082219", + "step": 5635, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012080914893886074, + "timestamp": "2025-10-01 03:28:01.105735", + "step": 5636, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.139312", + "step": 5636, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007558437646366656, + "timestamp": "2025-10-01 03:28:01.142088", + "step": 5637, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.173511", + "step": 5637, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016181438695639372, + "timestamp": "2025-10-01 03:28:01.177329", + "step": 5638, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.209267", + "step": 5638, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.848544530337676e-05, + "timestamp": "2025-10-01 03:28:01.212159", + "step": 5639, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.243432", + "step": 5639, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019589069415815175, + "timestamp": "2025-10-01 03:28:01.268051", + "step": 5640, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.300180", + "step": 5640, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029508047737181187, + "timestamp": "2025-10-01 03:28:01.302962", + "step": 5641, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.334183", + "step": 5641, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.324176451424137e-05, + "timestamp": "2025-10-01 03:28:01.336893", + "step": 5642, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.368769", + "step": 5642, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.96227400819771e-05, + "timestamp": "2025-10-01 03:28:01.370769", + "step": 5643, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.403020", + "step": 5643, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023055239580571651, + "timestamp": "2025-10-01 03:28:01.427370", + "step": 5644, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.460361", + "step": 5644, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.590208456851542e-05, + "timestamp": "2025-10-01 03:28:01.463061", + "step": 5645, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.494693", + "step": 5645, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001145071946666576, + "timestamp": "2025-10-01 03:28:01.497634", + "step": 5646, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.532300", + "step": 5646, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004140802193433046, + "timestamp": "2025-10-01 03:28:01.535247", + "step": 5647, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.568681", + "step": 5647, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001353077095700428, + "timestamp": "2025-10-01 03:28:01.592629", + "step": 5648, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.623254", + "step": 5648, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002760143543127924, + "timestamp": "2025-10-01 03:28:01.626201", + "step": 5649, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:01.657656", + "step": 5649, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025144690880551934, + "timestamp": "2025-10-01 03:28:01.660318", + "step": 5650, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.690843", + "step": 5650, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.058282412588596344, + "timestamp": "2025-10-01 03:28:01.694021", + "step": 5651, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.728041", + "step": 5651, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003187786787748337, + "timestamp": "2025-10-01 03:28:01.753568", + "step": 5652, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.784919", + "step": 5652, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036886826273985207, + "timestamp": "2025-10-01 03:28:01.787094", + "step": 5653, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:01.822695", + "step": 5653, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001669600314926356, + "timestamp": "2025-10-01 03:28:01.826930", + "step": 5654, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.857946", + "step": 5654, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001828869804739952, + "timestamp": "2025-10-01 03:28:01.860656", + "step": 5655, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.891858", + "step": 5655, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.169908349169418e-05, + "timestamp": "2025-10-01 03:28:01.916567", + "step": 5656, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.948410", + "step": 5656, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.935830606380478e-05, + "timestamp": "2025-10-01 03:28:01.951183", + "step": 5657, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:01.983158", + "step": 5657, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002396703202975914, + "timestamp": "2025-10-01 03:28:01.986068", + "step": 5658, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.021753", + "step": 5658, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003185216337442398, + "timestamp": "2025-10-01 03:28:02.024891", + "step": 5659, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.056809", + "step": 5659, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001928676647366956, + "timestamp": "2025-10-01 03:28:02.081333", + "step": 5660, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.113166", + "step": 5660, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002873233985155821, + "timestamp": "2025-10-01 03:28:02.116187", + "step": 5661, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.148319", + "step": 5661, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013382912147790194, + "timestamp": "2025-10-01 03:28:02.151514", + "step": 5662, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.183969", + "step": 5662, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005584682221524417, + "timestamp": "2025-10-01 03:28:02.186871", + "step": 5663, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:02.220000", + "step": 5663, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027637058519758284, + "timestamp": "2025-10-01 03:28:02.244506", + "step": 5664, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:02.277111", + "step": 5664, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004046032845508307, + "timestamp": "2025-10-01 03:28:02.280321", + "step": 5665, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.312955", + "step": 5665, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0038995977956801653, + "timestamp": "2025-10-01 03:28:02.320358", + "step": 5666, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.351421", + "step": 5666, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024226710957009345, + "timestamp": "2025-10-01 03:28:02.354258", + "step": 5667, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.386049", + "step": 5667, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014776134048588574, + "timestamp": "2025-10-01 03:28:02.410429", + "step": 5668, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:02.441362", + "step": 5668, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024389968893956393, + "timestamp": "2025-10-01 03:28:02.443600", + "step": 5669, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.474526", + "step": 5669, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048359655193053186, + "timestamp": "2025-10-01 03:28:02.476555", + "step": 5670, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:02.507282", + "step": 5670, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003214406024198979, + "timestamp": "2025-10-01 03:28:02.509518", + "step": 5671, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.540403", + "step": 5671, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027684002998284996, + "timestamp": "2025-10-01 03:28:02.564551", + "step": 5672, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.608320", + "step": 5672, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.48477754718624e-05, + "timestamp": "2025-10-01 03:28:02.611571", + "step": 5673, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.644699", + "step": 5673, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013504890375770628, + "timestamp": "2025-10-01 03:28:02.646922", + "step": 5674, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:02.677158", + "step": 5674, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004080976825207472, + "timestamp": "2025-10-01 03:28:02.679258", + "step": 5675, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.710291", + "step": 5675, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032469985308125615, + "timestamp": "2025-10-01 03:28:02.734074", + "step": 5676, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.765072", + "step": 5676, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016132988093886524, + "timestamp": "2025-10-01 03:28:02.767232", + "step": 5677, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:02.804224", + "step": 5677, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0671040490269661, + "timestamp": "2025-10-01 03:28:02.806390", + "step": 5678, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.836858", + "step": 5678, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024388408928643912, + "timestamp": "2025-10-01 03:28:02.839543", + "step": 5679, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.874773", + "step": 5679, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001350019738310948, + "timestamp": "2025-10-01 03:28:02.898856", + "step": 5680, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.930074", + "step": 5680, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01065539289265871, + "timestamp": "2025-10-01 03:28:02.932171", + "step": 5681, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.962390", + "step": 5681, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.355080692330375e-05, + "timestamp": "2025-10-01 03:28:02.966136", + "step": 5682, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:02.996662", + "step": 5682, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013342328020371497, + "timestamp": "2025-10-01 03:28:02.998854", + "step": 5683, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.029692", + "step": 5683, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007681422284804285, + "timestamp": "2025-10-01 03:28:03.053539", + "step": 5684, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.083817", + "step": 5684, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002037384081631899, + "timestamp": "2025-10-01 03:28:03.086563", + "step": 5685, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.118075", + "step": 5685, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013483728980645537, + "timestamp": "2025-10-01 03:28:03.120104", + "step": 5686, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.157526", + "step": 5686, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011441092647146434, + "timestamp": "2025-10-01 03:28:03.162156", + "step": 5687, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:03.206450", + "step": 5687, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011906906001968309, + "timestamp": "2025-10-01 03:28:03.245554", + "step": 5688, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.290892", + "step": 5688, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012710922164842486, + "timestamp": "2025-10-01 03:28:03.297640", + "step": 5689, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.333332", + "step": 5689, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006585153751075268, + "timestamp": "2025-10-01 03:28:03.342100", + "step": 5690, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.379172", + "step": 5690, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.740102667710744e-05, + "timestamp": "2025-10-01 03:28:03.386882", + "step": 5691, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.419515", + "step": 5691, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005236473516561091, + "timestamp": "2025-10-01 03:28:03.448342", + "step": 5692, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.482210", + "step": 5692, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006623975932598114, + "timestamp": "2025-10-01 03:28:03.487385", + "step": 5693, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:03.524876", + "step": 5693, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010808611987158656, + "timestamp": "2025-10-01 03:28:03.533713", + "step": 5694, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.566336", + "step": 5694, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.1686867664102465e-05, + "timestamp": "2025-10-01 03:28:03.572480", + "step": 5695, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.611921", + "step": 5695, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016306756879203022, + "timestamp": "2025-10-01 03:28:03.642295", + "step": 5696, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.676325", + "step": 5696, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018860114738345146, + "timestamp": "2025-10-01 03:28:03.680797", + "step": 5697, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:03.720895", + "step": 5697, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010646565351635218, + "timestamp": "2025-10-01 03:28:03.722939", + "step": 5698, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.753454", + "step": 5698, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004627132322639227, + "timestamp": "2025-10-01 03:28:03.761179", + "step": 5699, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:03.800943", + "step": 5699, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011188693315489218, + "timestamp": "2025-10-01 03:28:03.830046", + "step": 5700, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:03.866046", + "step": 5700, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008120899088680744, + "timestamp": "2025-10-01 03:28:03.874889", + "step": 5701, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.909957", + "step": 5701, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007331055589020252, + "timestamp": "2025-10-01 03:28:03.920381", + "step": 5702, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:03.961790", + "step": 5702, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.08500132709741592, + "timestamp": "2025-10-01 03:28:03.969043", + "step": 5703, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:04.001202", + "step": 5703, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030161014292389154, + "timestamp": "2025-10-01 03:28:04.030304", + "step": 5704, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.065054", + "step": 5704, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.052815306931734085, + "timestamp": "2025-10-01 03:28:04.070419", + "step": 5705, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.107248", + "step": 5705, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.556869190419093e-05, + "timestamp": "2025-10-01 03:28:04.109655", + "step": 5706, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.146893", + "step": 5706, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005940374103374779, + "timestamp": "2025-10-01 03:28:04.158923", + "step": 5707, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.202962", + "step": 5707, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030023479484952986, + "timestamp": "2025-10-01 03:28:04.237934", + "step": 5708, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.276975", + "step": 5708, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007610931061208248, + "timestamp": "2025-10-01 03:28:04.286362", + "step": 5709, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.324831", + "step": 5709, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011214111931622028, + "timestamp": "2025-10-01 03:28:04.328599", + "step": 5710, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:04.367660", + "step": 5710, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018775586795527488, + "timestamp": "2025-10-01 03:28:04.376925", + "step": 5711, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.416654", + "step": 5711, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002985867322422564, + "timestamp": "2025-10-01 03:28:04.442551", + "step": 5712, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.480375", + "step": 5712, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02081427536904812, + "timestamp": "2025-10-01 03:28:04.496034", + "step": 5713, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.538561", + "step": 5713, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020296950242482126, + "timestamp": "2025-10-01 03:28:04.555770", + "step": 5714, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.601073", + "step": 5714, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.113676060223952e-05, + "timestamp": "2025-10-01 03:28:04.616772", + "step": 5715, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.656640", + "step": 5715, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014313396532088518, + "timestamp": "2025-10-01 03:28:04.690157", + "step": 5716, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:28:04.732510", + "step": 5716, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008713230490684509, + "timestamp": "2025-10-01 03:28:04.736582", + "step": 5717, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.776459", + "step": 5717, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002459574898239225, + "timestamp": "2025-10-01 03:28:04.793186", + "step": 5718, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.844662", + "step": 5718, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009186535142362118, + "timestamp": "2025-10-01 03:28:04.864511", + "step": 5719, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:04.915139", + "step": 5719, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003357954847160727, + "timestamp": "2025-10-01 03:28:04.956911", + "step": 5720, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.021115", + "step": 5720, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.24742965050973e-05, + "timestamp": "2025-10-01 03:28:05.040657", + "step": 5721, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.095655", + "step": 5721, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033869053004309535, + "timestamp": "2025-10-01 03:28:05.100092", + "step": 5722, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.134071", + "step": 5722, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010028514952864498, + "timestamp": "2025-10-01 03:28:05.142341", + "step": 5723, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.185043", + "step": 5723, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004118992481380701, + "timestamp": "2025-10-01 03:28:05.214971", + "step": 5724, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:05.252025", + "step": 5724, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.030613407492637634, + "timestamp": "2025-10-01 03:28:05.266983", + "step": 5725, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.305977", + "step": 5725, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010362658213125542, + "timestamp": "2025-10-01 03:28:05.325931", + "step": 5726, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.384615", + "step": 5726, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001736323582008481, + "timestamp": "2025-10-01 03:28:05.405766", + "step": 5727, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.440886", + "step": 5727, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000493147992528975, + "timestamp": "2025-10-01 03:28:05.476393", + "step": 5728, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:05.509701", + "step": 5728, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019519347697496414, + "timestamp": "2025-10-01 03:28:05.532389", + "step": 5729, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.574732", + "step": 5729, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004518554487731308, + "timestamp": "2025-10-01 03:28:05.577419", + "step": 5730, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.609145", + "step": 5730, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032352786511182785, + "timestamp": "2025-10-01 03:28:05.613833", + "step": 5731, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:05.647083", + "step": 5731, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028310196939855814, + "timestamp": "2025-10-01 03:28:05.671970", + "step": 5732, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.702855", + "step": 5732, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015837670071050525, + "timestamp": "2025-10-01 03:28:05.705256", + "step": 5733, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.737589", + "step": 5733, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011643161997199059, + "timestamp": "2025-10-01 03:28:05.740869", + "step": 5734, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:05.773494", + "step": 5734, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0073793441988527775, + "timestamp": "2025-10-01 03:28:05.775947", + "step": 5735, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.808380", + "step": 5735, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006480951560661197, + "timestamp": "2025-10-01 03:28:05.834942", + "step": 5736, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.869004", + "step": 5736, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015333750343415886, + "timestamp": "2025-10-01 03:28:05.874714", + "step": 5737, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.909019", + "step": 5737, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008742661448195577, + "timestamp": "2025-10-01 03:28:05.916287", + "step": 5738, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.947543", + "step": 5738, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008463295176625252, + "timestamp": "2025-10-01 03:28:05.954656", + "step": 5739, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:05.990134", + "step": 5739, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010262498399242759, + "timestamp": "2025-10-01 03:28:06.020365", + "step": 5740, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.054495", + "step": 5740, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.038327980786561966, + "timestamp": "2025-10-01 03:28:06.061145", + "step": 5741, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.106582", + "step": 5741, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007682341150939465, + "timestamp": "2025-10-01 03:28:06.111353", + "step": 5742, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.154391", + "step": 5742, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00119018426630646, + "timestamp": "2025-10-01 03:28:06.156894", + "step": 5743, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.189878", + "step": 5743, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019733519002329558, + "timestamp": "2025-10-01 03:28:06.219155", + "step": 5744, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:06.252226", + "step": 5744, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002863869594875723, + "timestamp": "2025-10-01 03:28:06.257871", + "step": 5745, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.308351", + "step": 5745, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025705076404847205, + "timestamp": "2025-10-01 03:28:06.310592", + "step": 5746, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.342566", + "step": 5746, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011508940951898694, + "timestamp": "2025-10-01 03:28:06.344582", + "step": 5747, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.376608", + "step": 5747, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012074584374204278, + "timestamp": "2025-10-01 03:28:06.400203", + "step": 5748, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:06.432604", + "step": 5748, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.06494910269975662, + "timestamp": "2025-10-01 03:28:06.435041", + "step": 5749, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.466901", + "step": 5749, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001626321318326518, + "timestamp": "2025-10-01 03:28:06.469049", + "step": 5750, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.501076", + "step": 5750, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00955283548682928, + "timestamp": "2025-10-01 03:28:06.503137", + "step": 5751, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.549576", + "step": 5751, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004528920166194439, + "timestamp": "2025-10-01 03:28:06.573121", + "step": 5752, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.608691", + "step": 5752, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003610695246607065, + "timestamp": "2025-10-01 03:28:06.611001", + "step": 5753, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.641732", + "step": 5753, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010862050112336874, + "timestamp": "2025-10-01 03:28:06.643871", + "step": 5754, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.675153", + "step": 5754, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001913379819598049, + "timestamp": "2025-10-01 03:28:06.680275", + "step": 5755, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.716274", + "step": 5755, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017628383648116142, + "timestamp": "2025-10-01 03:28:06.739986", + "step": 5756, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.770579", + "step": 5756, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010993791511282325, + "timestamp": "2025-10-01 03:28:06.772731", + "step": 5757, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.804737", + "step": 5757, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003485390043351799, + "timestamp": "2025-10-01 03:28:06.806960", + "step": 5758, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.841067", + "step": 5758, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009954291395843029, + "timestamp": "2025-10-01 03:28:06.843644", + "step": 5759, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.879147", + "step": 5759, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.10154996812343597, + "timestamp": "2025-10-01 03:28:06.902917", + "step": 5760, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.933825", + "step": 5760, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002082876628264785, + "timestamp": "2025-10-01 03:28:06.936237", + "step": 5761, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:06.973411", + "step": 5761, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040776978130452335, + "timestamp": "2025-10-01 03:28:06.976208", + "step": 5762, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-10-01 03:28:07.017547", + "step": 5762, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013465952361002564, + "timestamp": "2025-10-01 03:28:07.019806", + "step": 5763, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:07.050716", + "step": 5763, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004562152607832104, + "timestamp": "2025-10-01 03:28:07.074274", + "step": 5764, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:07.108843", + "step": 5764, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017812704027164727, + "timestamp": "2025-10-01 03:28:07.110915", + "step": 5765, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:07.142323", + "step": 5765, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003493714320939034, + "timestamp": "2025-10-01 03:28:07.144542", + "step": 5766, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:07.176637", + "step": 5766, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.030109375715255737, + "timestamp": "2025-10-01 03:28:07.178894", + "step": 5767, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:07.211735", + "step": 5767, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028339424170553684, + "timestamp": "2025-10-01 03:28:07.235414", + "step": 5768, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:07.267096", + "step": 5768, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004576197825372219, + "timestamp": "2025-10-01 03:28:07.269248", + "step": 5769, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:07.300804", + "step": 5769, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019199613481760025, + "timestamp": "2025-10-01 03:28:07.302889", + "step": 5770, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:07.336596", + "step": 5770, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05093200132250786, + "timestamp": "2025-10-01 03:28:07.338812", + "step": 5771, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:07.369537", + "step": 5771, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015998129965737462, + "timestamp": "2025-10-01 03:28:07.393444", + "step": 5772, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:07.430821", + "step": 5772, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012648636475205421, + "timestamp": "2025-10-01 03:28:07.434571", + "step": 5773, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:07.467028", + "step": 5773, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005021379329264164, + "timestamp": "2025-10-01 03:28:07.469277", + "step": 5774, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:07.505109", + "step": 5774, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002472309162840247, + "timestamp": "2025-10-01 03:28:07.507455", + "step": 5775, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:07.539879", + "step": 5775, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036743548116646707, + "timestamp": "2025-10-01 03:28:07.563524", + "step": 5776, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:28:10.075965", + "step": 5776, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2370993.233287946, + "timestamp": "2025-10-01 03:28:10.079255", + "step": 5776, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.109058", + "step": 5776, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014029623940587044, + "timestamp": "2025-10-01 03:28:10.111922", + "step": 5777, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.143865", + "step": 5777, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017630709044169635, + "timestamp": "2025-10-01 03:28:10.148321", + "step": 5778, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.191006", + "step": 5778, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027850226615555584, + "timestamp": "2025-10-01 03:28:10.193741", + "step": 5779, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.231307", + "step": 5779, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007746168412268162, + "timestamp": "2025-10-01 03:28:10.256064", + "step": 5780, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.288363", + "step": 5780, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010597476735711098, + "timestamp": "2025-10-01 03:28:10.291200", + "step": 5781, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.322889", + "step": 5781, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007827702793292701, + "timestamp": "2025-10-01 03:28:10.326672", + "step": 5782, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:10.358080", + "step": 5782, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008948014001362026, + "timestamp": "2025-10-01 03:28:10.361012", + "step": 5783, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.393260", + "step": 5783, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004833804559893906, + "timestamp": "2025-10-01 03:28:10.417890", + "step": 5784, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.449779", + "step": 5784, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01795135624706745, + "timestamp": "2025-10-01 03:28:10.452377", + "step": 5785, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.482966", + "step": 5785, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017560316249728203, + "timestamp": "2025-10-01 03:28:10.486869", + "step": 5786, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:10.519323", + "step": 5786, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01891818456351757, + "timestamp": "2025-10-01 03:28:10.522116", + "step": 5787, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:10.553788", + "step": 5787, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001034446875564754, + "timestamp": "2025-10-01 03:28:10.578407", + "step": 5788, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.609496", + "step": 5788, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0050679841078817844, + "timestamp": "2025-10-01 03:28:10.611715", + "step": 5789, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.649862", + "step": 5789, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001714498968794942, + "timestamp": "2025-10-01 03:28:10.653350", + "step": 5790, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.683888", + "step": 5790, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022579357028007507, + "timestamp": "2025-10-01 03:28:10.686106", + "step": 5791, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.716577", + "step": 5791, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016432907432317734, + "timestamp": "2025-10-01 03:28:10.740234", + "step": 5792, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.773163", + "step": 5792, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001406156807206571, + "timestamp": "2025-10-01 03:28:10.775337", + "step": 5793, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.808652", + "step": 5793, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003012751927599311, + "timestamp": "2025-10-01 03:28:10.810989", + "step": 5794, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.842021", + "step": 5794, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011471209581941366, + "timestamp": "2025-10-01 03:28:10.845053", + "step": 5795, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:10.891603", + "step": 5795, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013491293648257852, + "timestamp": "2025-10-01 03:28:10.915421", + "step": 5796, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:10.946368", + "step": 5796, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007097940542735159, + "timestamp": "2025-10-01 03:28:10.948981", + "step": 5797, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:10.979778", + "step": 5797, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003532333066686988, + "timestamp": "2025-10-01 03:28:10.981959", + "step": 5798, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.012836", + "step": 5798, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010055721504613757, + "timestamp": "2025-10-01 03:28:11.015029", + "step": 5799, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.045474", + "step": 5799, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008468144806101918, + "timestamp": "2025-10-01 03:28:11.069172", + "step": 5800, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.100683", + "step": 5800, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020432379096746445, + "timestamp": "2025-10-01 03:28:11.102877", + "step": 5801, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.135003", + "step": 5801, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011745716445147991, + "timestamp": "2025-10-01 03:28:11.137223", + "step": 5802, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.167943", + "step": 5802, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004787014040630311, + "timestamp": "2025-10-01 03:28:11.170131", + "step": 5803, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.201614", + "step": 5803, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013960620388388634, + "timestamp": "2025-10-01 03:28:11.226634", + "step": 5804, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.265871", + "step": 5804, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006126130931079388, + "timestamp": "2025-10-01 03:28:11.268078", + "step": 5805, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.309928", + "step": 5805, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013550490140914917, + "timestamp": "2025-10-01 03:28:11.312451", + "step": 5806, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.356680", + "step": 5806, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02178989350795746, + "timestamp": "2025-10-01 03:28:11.360082", + "step": 5807, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.391646", + "step": 5807, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001976788742467761, + "timestamp": "2025-10-01 03:28:11.423268", + "step": 5808, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.474819", + "step": 5808, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006946742068976164, + "timestamp": "2025-10-01 03:28:11.477384", + "step": 5809, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.512702", + "step": 5809, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03970056027173996, + "timestamp": "2025-10-01 03:28:11.514865", + "step": 5810, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.548557", + "step": 5810, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002297234022989869, + "timestamp": "2025-10-01 03:28:11.550581", + "step": 5811, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.581746", + "step": 5811, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031965873204171658, + "timestamp": "2025-10-01 03:28:11.605568", + "step": 5812, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.636343", + "step": 5812, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020522300619632006, + "timestamp": "2025-10-01 03:28:11.638477", + "step": 5813, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.669969", + "step": 5813, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010176433715969324, + "timestamp": "2025-10-01 03:28:11.672618", + "step": 5814, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.709185", + "step": 5814, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004019808489829302, + "timestamp": "2025-10-01 03:28:11.713762", + "step": 5815, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.759853", + "step": 5815, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014645671471953392, + "timestamp": "2025-10-01 03:28:11.783584", + "step": 5816, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:11.814856", + "step": 5816, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009161676862277091, + "timestamp": "2025-10-01 03:28:11.817079", + "step": 5817, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:11.849211", + "step": 5817, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020326648373156786, + "timestamp": "2025-10-01 03:28:11.851433", + "step": 5818, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:11.883108", + "step": 5818, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010812477208673954, + "timestamp": "2025-10-01 03:28:11.885285", + "step": 5819, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:11.917588", + "step": 5819, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020037225913256407, + "timestamp": "2025-10-01 03:28:11.941553", + "step": 5820, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:11.974175", + "step": 5820, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014621511101722717, + "timestamp": "2025-10-01 03:28:11.976383", + "step": 5821, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.010026", + "step": 5821, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005938555113971233, + "timestamp": "2025-10-01 03:28:12.012776", + "step": 5822, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.045377", + "step": 5822, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.048370908945798874, + "timestamp": "2025-10-01 03:28:12.047727", + "step": 5823, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:12.079869", + "step": 5823, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006025636103004217, + "timestamp": "2025-10-01 03:28:12.103550", + "step": 5824, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.136543", + "step": 5824, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0048393248580396175, + "timestamp": "2025-10-01 03:28:12.138914", + "step": 5825, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.171241", + "step": 5825, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012126759393140674, + "timestamp": "2025-10-01 03:28:12.173513", + "step": 5826, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.206372", + "step": 5826, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007850335910916328, + "timestamp": "2025-10-01 03:28:12.208851", + "step": 5827, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.240314", + "step": 5827, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007022529025562108, + "timestamp": "2025-10-01 03:28:12.264469", + "step": 5828, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.295220", + "step": 5828, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0036963445600122213, + "timestamp": "2025-10-01 03:28:12.297408", + "step": 5829, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.328939", + "step": 5829, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008360156789422035, + "timestamp": "2025-10-01 03:28:12.331251", + "step": 5830, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.362154", + "step": 5830, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008900194079615176, + "timestamp": "2025-10-01 03:28:12.364143", + "step": 5831, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.395245", + "step": 5831, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015069050714373589, + "timestamp": "2025-10-01 03:28:12.418939", + "step": 5832, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:28:12.450373", + "step": 5832, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005068556405603886, + "timestamp": "2025-10-01 03:28:12.452922", + "step": 5833, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.484071", + "step": 5833, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012780667748302221, + "timestamp": "2025-10-01 03:28:12.486449", + "step": 5834, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.516939", + "step": 5834, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007611875189468265, + "timestamp": "2025-10-01 03:28:12.519102", + "step": 5835, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.549622", + "step": 5835, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025854669511318207, + "timestamp": "2025-10-01 03:28:12.573475", + "step": 5836, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.604903", + "step": 5836, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015054686926305294, + "timestamp": "2025-10-01 03:28:12.606999", + "step": 5837, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.637552", + "step": 5837, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024045363534241915, + "timestamp": "2025-10-01 03:28:12.639811", + "step": 5838, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:12.672625", + "step": 5838, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007742919842712581, + "timestamp": "2025-10-01 03:28:12.674716", + "step": 5839, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.706433", + "step": 5839, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009995250729843974, + "timestamp": "2025-10-01 03:28:12.730874", + "step": 5840, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.761931", + "step": 5840, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004795674234628677, + "timestamp": "2025-10-01 03:28:12.763976", + "step": 5841, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.795116", + "step": 5841, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008355500176548958, + "timestamp": "2025-10-01 03:28:12.797722", + "step": 5842, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.828450", + "step": 5842, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009048302890732884, + "timestamp": "2025-10-01 03:28:12.830689", + "step": 5843, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:12.861896", + "step": 5843, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015026286710053682, + "timestamp": "2025-10-01 03:28:12.885656", + "step": 5844, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.920498", + "step": 5844, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02177857793867588, + "timestamp": "2025-10-01 03:28:12.922655", + "step": 5845, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.954985", + "step": 5845, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016460184007883072, + "timestamp": "2025-10-01 03:28:12.957220", + "step": 5846, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:12.988694", + "step": 5846, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040070529212243855, + "timestamp": "2025-10-01 03:28:12.990980", + "step": 5847, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.022983", + "step": 5847, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05615383759140968, + "timestamp": "2025-10-01 03:28:13.046762", + "step": 5848, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.079233", + "step": 5848, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015838093822821975, + "timestamp": "2025-10-01 03:28:13.082566", + "step": 5849, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.128160", + "step": 5849, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034829540527425706, + "timestamp": "2025-10-01 03:28:13.130286", + "step": 5850, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:13.160956", + "step": 5850, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009042136371135712, + "timestamp": "2025-10-01 03:28:13.163223", + "step": 5851, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.193514", + "step": 5851, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043583993101492524, + "timestamp": "2025-10-01 03:28:13.217125", + "step": 5852, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.247428", + "step": 5852, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003255477931816131, + "timestamp": "2025-10-01 03:28:13.249796", + "step": 5853, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.281375", + "step": 5853, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016812592511996627, + "timestamp": "2025-10-01 03:28:13.284691", + "step": 5854, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.328365", + "step": 5854, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028044768259860575, + "timestamp": "2025-10-01 03:28:13.330558", + "step": 5855, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.362673", + "step": 5855, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001825203886255622, + "timestamp": "2025-10-01 03:28:13.386144", + "step": 5856, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.418526", + "step": 5856, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001949899597093463, + "timestamp": "2025-10-01 03:28:13.420468", + "step": 5857, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:13.452311", + "step": 5857, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006072259857319295, + "timestamp": "2025-10-01 03:28:13.454395", + "step": 5858, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.485628", + "step": 5858, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004814528801944107, + "timestamp": "2025-10-01 03:28:13.488818", + "step": 5859, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.520504", + "step": 5859, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024156458675861359, + "timestamp": "2025-10-01 03:28:13.544394", + "step": 5860, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.586805", + "step": 5860, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014635698171332479, + "timestamp": "2025-10-01 03:28:13.589994", + "step": 5861, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.624686", + "step": 5861, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03614004701375961, + "timestamp": "2025-10-01 03:28:13.626884", + "step": 5862, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:13.658969", + "step": 5862, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001905947719933465, + "timestamp": "2025-10-01 03:28:13.661046", + "step": 5863, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:13.692198", + "step": 5863, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005014174967072904, + "timestamp": "2025-10-01 03:28:13.716187", + "step": 5864, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.746992", + "step": 5864, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016218468081206083, + "timestamp": "2025-10-01 03:28:13.751995", + "step": 5865, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.792353", + "step": 5865, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013992937747389078, + "timestamp": "2025-10-01 03:28:13.794649", + "step": 5866, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.826439", + "step": 5866, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018402732675895095, + "timestamp": "2025-10-01 03:28:13.828626", + "step": 5867, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:13.859600", + "step": 5867, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002998779236804694, + "timestamp": "2025-10-01 03:28:13.883214", + "step": 5868, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.914523", + "step": 5868, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012164742220193148, + "timestamp": "2025-10-01 03:28:13.916804", + "step": 5869, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.948225", + "step": 5869, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012198310578241944, + "timestamp": "2025-10-01 03:28:13.950723", + "step": 5870, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:13.983190", + "step": 5870, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005721209105104208, + "timestamp": "2025-10-01 03:28:13.985196", + "step": 5871, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:14.018602", + "step": 5871, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00686821062117815, + "timestamp": "2025-10-01 03:28:14.042393", + "step": 5872, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.073409", + "step": 5872, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025452040135860443, + "timestamp": "2025-10-01 03:28:14.075546", + "step": 5873, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.108990", + "step": 5873, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008235541172325611, + "timestamp": "2025-10-01 03:28:14.111255", + "step": 5874, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.143832", + "step": 5874, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010370054515078664, + "timestamp": "2025-10-01 03:28:14.145990", + "step": 5875, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.176638", + "step": 5875, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015960786258801818, + "timestamp": "2025-10-01 03:28:14.200474", + "step": 5876, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.231868", + "step": 5876, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044372299453243613, + "timestamp": "2025-10-01 03:28:14.234038", + "step": 5877, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.264815", + "step": 5877, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035671864170581102, + "timestamp": "2025-10-01 03:28:14.268004", + "step": 5878, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.301371", + "step": 5878, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000638589437585324, + "timestamp": "2025-10-01 03:28:14.303545", + "step": 5879, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.334856", + "step": 5879, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0027443147264420986, + "timestamp": "2025-10-01 03:28:14.358469", + "step": 5880, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.389988", + "step": 5880, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007317045237869024, + "timestamp": "2025-10-01 03:28:14.391950", + "step": 5881, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.422839", + "step": 5881, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033616815926507115, + "timestamp": "2025-10-01 03:28:14.425307", + "step": 5882, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.456116", + "step": 5882, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017987636383622885, + "timestamp": "2025-10-01 03:28:14.458326", + "step": 5883, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.503615", + "step": 5883, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001572321867570281, + "timestamp": "2025-10-01 03:28:14.527730", + "step": 5884, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.560423", + "step": 5884, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008109565242193639, + "timestamp": "2025-10-01 03:28:14.562615", + "step": 5885, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:14.593269", + "step": 5885, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023349204275291413, + "timestamp": "2025-10-01 03:28:14.595402", + "step": 5886, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.626332", + "step": 5886, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044074791367165744, + "timestamp": "2025-10-01 03:28:14.628290", + "step": 5887, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.662189", + "step": 5887, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002293781581101939, + "timestamp": "2025-10-01 03:28:14.685872", + "step": 5888, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:14.717326", + "step": 5888, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05191332474350929, + "timestamp": "2025-10-01 03:28:14.719380", + "step": 5889, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.750012", + "step": 5889, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004997951327823102, + "timestamp": "2025-10-01 03:28:14.751930", + "step": 5890, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.783061", + "step": 5890, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008429765002802014, + "timestamp": "2025-10-01 03:28:14.785353", + "step": 5891, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:14.816680", + "step": 5891, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004765797930303961, + "timestamp": "2025-10-01 03:28:14.840073", + "step": 5892, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.872067", + "step": 5892, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0037751358468085527, + "timestamp": "2025-10-01 03:28:14.894875", + "step": 5893, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.937624", + "step": 5893, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021638731413986534, + "timestamp": "2025-10-01 03:28:14.940002", + "step": 5894, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:14.981139", + "step": 5894, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015850309282541275, + "timestamp": "2025-10-01 03:28:14.988190", + "step": 5895, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.029182", + "step": 5895, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007004160434007645, + "timestamp": "2025-10-01 03:28:15.053698", + "step": 5896, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.096842", + "step": 5896, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000779345165938139, + "timestamp": "2025-10-01 03:28:15.100048", + "step": 5897, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.131685", + "step": 5897, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04642093554139137, + "timestamp": "2025-10-01 03:28:15.139504", + "step": 5898, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:15.173290", + "step": 5898, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013481283094733953, + "timestamp": "2025-10-01 03:28:15.175742", + "step": 5899, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.206905", + "step": 5899, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012568170204758644, + "timestamp": "2025-10-01 03:28:15.231054", + "step": 5900, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.264595", + "step": 5900, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016170262824743986, + "timestamp": "2025-10-01 03:28:15.266917", + "step": 5901, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.298028", + "step": 5901, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002848710282705724, + "timestamp": "2025-10-01 03:28:15.300173", + "step": 5902, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.330702", + "step": 5902, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008111443021334708, + "timestamp": "2025-10-01 03:28:15.336890", + "step": 5903, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.367941", + "step": 5903, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014980351261328906, + "timestamp": "2025-10-01 03:28:15.391371", + "step": 5904, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:15.422744", + "step": 5904, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005798633792437613, + "timestamp": "2025-10-01 03:28:15.424941", + "step": 5905, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.455804", + "step": 5905, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047697810805402696, + "timestamp": "2025-10-01 03:28:15.457997", + "step": 5906, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.489609", + "step": 5906, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008768097497522831, + "timestamp": "2025-10-01 03:28:15.491678", + "step": 5907, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.522274", + "step": 5907, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020582028664648533, + "timestamp": "2025-10-01 03:28:15.546009", + "step": 5908, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.578766", + "step": 5908, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037276200600899756, + "timestamp": "2025-10-01 03:28:15.580806", + "step": 5909, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:15.611777", + "step": 5909, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013197178486734629, + "timestamp": "2025-10-01 03:28:15.615360", + "step": 5910, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.646414", + "step": 5910, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009928205981850624, + "timestamp": "2025-10-01 03:28:15.648520", + "step": 5911, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.678843", + "step": 5911, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009371941909193993, + "timestamp": "2025-10-01 03:28:15.703717", + "step": 5912, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.734511", + "step": 5912, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005372516345232725, + "timestamp": "2025-10-01 03:28:15.736763", + "step": 5913, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:15.767730", + "step": 5913, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023979421239346266, + "timestamp": "2025-10-01 03:28:15.769913", + "step": 5914, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.801087", + "step": 5914, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05316244810819626, + "timestamp": "2025-10-01 03:28:15.803112", + "step": 5915, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.834233", + "step": 5915, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011013693176209927, + "timestamp": "2025-10-01 03:28:15.857954", + "step": 5916, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.889579", + "step": 5916, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011494151112856343, + "timestamp": "2025-10-01 03:28:15.891873", + "step": 5917, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.924123", + "step": 5917, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000311236857669428, + "timestamp": "2025-10-01 03:28:15.926588", + "step": 5918, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:15.960654", + "step": 5918, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006823836360126734, + "timestamp": "2025-10-01 03:28:15.967646", + "step": 5919, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:16.003813", + "step": 5919, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014803380472585559, + "timestamp": "2025-10-01 03:28:16.030264", + "step": 5920, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:16.061798", + "step": 5920, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038438051706179976, + "timestamp": "2025-10-01 03:28:16.065516", + "step": 5921, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:16.096517", + "step": 5921, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009248864953406155, + "timestamp": "2025-10-01 03:28:16.100691", + "step": 5922, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:16.132638", + "step": 5922, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009092492400668561, + "timestamp": "2025-10-01 03:28:16.135952", + "step": 5923, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:16.167672", + "step": 5923, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.306223415071145e-05, + "timestamp": "2025-10-01 03:28:16.193885", + "step": 5924, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:16.226732", + "step": 5924, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005945895682089031, + "timestamp": "2025-10-01 03:28:16.240708", + "step": 5925, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:16.275244", + "step": 5925, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011985169112449512, + "timestamp": "2025-10-01 03:28:16.280541", + "step": 5926, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:16.316485", + "step": 5926, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01137503795325756, + "timestamp": "2025-10-01 03:28:16.330872", + "step": 5927, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:16.370466", + "step": 5927, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004855907172895968, + "timestamp": "2025-10-01 03:28:16.399392", + "step": 5928, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:28:19.119639", + "step": 5928, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2308087.0122670927, + "timestamp": "2025-10-01 03:28:19.121911", + "step": 5928, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.151191", + "step": 5928, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028393458342179656, + "timestamp": "2025-10-01 03:28:19.153856", + "step": 5929, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.192165", + "step": 5929, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016721566498745233, + "timestamp": "2025-10-01 03:28:19.196893", + "step": 5930, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.228783", + "step": 5930, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022274241200648248, + "timestamp": "2025-10-01 03:28:19.230915", + "step": 5931, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.261455", + "step": 5931, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04157964512705803, + "timestamp": "2025-10-01 03:28:19.286285", + "step": 5932, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.321428", + "step": 5932, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003736482933163643, + "timestamp": "2025-10-01 03:28:19.323649", + "step": 5933, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.355094", + "step": 5933, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014942003414034843, + "timestamp": "2025-10-01 03:28:19.357574", + "step": 5934, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.388936", + "step": 5934, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00846152938902378, + "timestamp": "2025-10-01 03:28:19.391349", + "step": 5935, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.424168", + "step": 5935, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002616280107758939, + "timestamp": "2025-10-01 03:28:19.448116", + "step": 5936, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.478619", + "step": 5936, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.050390031188726425, + "timestamp": "2025-10-01 03:28:19.480985", + "step": 5937, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.512548", + "step": 5937, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008060670457780361, + "timestamp": "2025-10-01 03:28:19.518869", + "step": 5938, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.551725", + "step": 5938, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011650604428723454, + "timestamp": "2025-10-01 03:28:19.554466", + "step": 5939, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.584910", + "step": 5939, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030865336884744465, + "timestamp": "2025-10-01 03:28:19.609765", + "step": 5940, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.641156", + "step": 5940, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010821815521921962, + "timestamp": "2025-10-01 03:28:19.643302", + "step": 5941, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.673527", + "step": 5941, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014477765944320709, + "timestamp": "2025-10-01 03:28:19.675582", + "step": 5942, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:19.705957", + "step": 5942, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010572867467999458, + "timestamp": "2025-10-01 03:28:19.708289", + "step": 5943, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.744671", + "step": 5943, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001325259217992425, + "timestamp": "2025-10-01 03:28:19.768372", + "step": 5944, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.799656", + "step": 5944, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018437446851748973, + "timestamp": "2025-10-01 03:28:19.801876", + "step": 5945, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.836045", + "step": 5945, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035587865859270096, + "timestamp": "2025-10-01 03:28:19.838247", + "step": 5946, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.869158", + "step": 5946, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006744289421476424, + "timestamp": "2025-10-01 03:28:19.872656", + "step": 5947, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.903664", + "step": 5947, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02526232786476612, + "timestamp": "2025-10-01 03:28:19.927813", + "step": 5948, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.958824", + "step": 5948, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.914817928802222e-05, + "timestamp": "2025-10-01 03:28:19.960745", + "step": 5949, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:19.991202", + "step": 5949, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004766111262142658, + "timestamp": "2025-10-01 03:28:19.999423", + "step": 5950, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:20.031162", + "step": 5950, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014128695474937558, + "timestamp": "2025-10-01 03:28:20.033896", + "step": 5951, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.065364", + "step": 5951, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015201757661998272, + "timestamp": "2025-10-01 03:28:20.090191", + "step": 5952, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.124205", + "step": 5952, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020354434673208743, + "timestamp": "2025-10-01 03:28:20.128693", + "step": 5953, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.160532", + "step": 5953, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001884177909232676, + "timestamp": "2025-10-01 03:28:20.163036", + "step": 5954, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.193781", + "step": 5954, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004954843316227198, + "timestamp": "2025-10-01 03:28:20.196031", + "step": 5955, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.226200", + "step": 5955, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013147053541615605, + "timestamp": "2025-10-01 03:28:20.250778", + "step": 5956, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.286321", + "step": 5956, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003829069319181144, + "timestamp": "2025-10-01 03:28:20.292642", + "step": 5957, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:20.326241", + "step": 5957, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031610876321792603, + "timestamp": "2025-10-01 03:28:20.328633", + "step": 5958, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.358989", + "step": 5958, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003231967275496572, + "timestamp": "2025-10-01 03:28:20.361220", + "step": 5959, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.391704", + "step": 5959, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047221913700923324, + "timestamp": "2025-10-01 03:28:20.415334", + "step": 5960, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.450302", + "step": 5960, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001430262578651309, + "timestamp": "2025-10-01 03:28:20.452648", + "step": 5961, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.484030", + "step": 5961, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005008888547308743, + "timestamp": "2025-10-01 03:28:20.486158", + "step": 5962, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:20.516937", + "step": 5962, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013582519022747874, + "timestamp": "2025-10-01 03:28:20.519343", + "step": 5963, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.550124", + "step": 5963, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010853508720174432, + "timestamp": "2025-10-01 03:28:20.574021", + "step": 5964, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.604441", + "step": 5964, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012141991464886814, + "timestamp": "2025-10-01 03:28:20.607086", + "step": 5965, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.639145", + "step": 5965, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002592235687188804, + "timestamp": "2025-10-01 03:28:20.645507", + "step": 5966, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.680407", + "step": 5966, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003071758896112442, + "timestamp": "2025-10-01 03:28:20.683214", + "step": 5967, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.717173", + "step": 5967, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001535826246254146, + "timestamp": "2025-10-01 03:28:20.740906", + "step": 5968, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.772027", + "step": 5968, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002586044429335743, + "timestamp": "2025-10-01 03:28:20.774195", + "step": 5969, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:20.804551", + "step": 5969, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000596067460719496, + "timestamp": "2025-10-01 03:28:20.806748", + "step": 5970, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.837354", + "step": 5970, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005486424779519439, + "timestamp": "2025-10-01 03:28:20.839657", + "step": 5971, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.869891", + "step": 5971, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010648434050381184, + "timestamp": "2025-10-01 03:28:20.894038", + "step": 5972, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.924596", + "step": 5972, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004088799003511667, + "timestamp": "2025-10-01 03:28:20.926866", + "step": 5973, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.957222", + "step": 5973, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001479812548495829, + "timestamp": "2025-10-01 03:28:20.959926", + "step": 5974, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:20.990948", + "step": 5974, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001476272736908868, + "timestamp": "2025-10-01 03:28:20.993272", + "step": 5975, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:21.024789", + "step": 5975, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012288793222978711, + "timestamp": "2025-10-01 03:28:21.048495", + "step": 5976, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:21.087263", + "step": 5976, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.376230107387528e-05, + "timestamp": "2025-10-01 03:28:21.089636", + "step": 5977, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:21.119678", + "step": 5977, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006146617233753204, + "timestamp": "2025-10-01 03:28:21.121859", + "step": 5978, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.152591", + "step": 5978, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012854924716521055, + "timestamp": "2025-10-01 03:28:21.155239", + "step": 5979, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:21.186544", + "step": 5979, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011727002129191533, + "timestamp": "2025-10-01 03:28:21.211387", + "step": 5980, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.241826", + "step": 5980, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000395993614802137, + "timestamp": "2025-10-01 03:28:21.244090", + "step": 5981, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.274319", + "step": 5981, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033671289565972984, + "timestamp": "2025-10-01 03:28:21.277002", + "step": 5982, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.307470", + "step": 5982, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001409641554346308, + "timestamp": "2025-10-01 03:28:21.309789", + "step": 5983, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.340348", + "step": 5983, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002038240636466071, + "timestamp": "2025-10-01 03:28:21.364400", + "step": 5984, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.397177", + "step": 5984, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006970302667468786, + "timestamp": "2025-10-01 03:28:21.399391", + "step": 5985, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.430353", + "step": 5985, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.134543168125674e-05, + "timestamp": "2025-10-01 03:28:21.432417", + "step": 5986, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.463084", + "step": 5986, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003946029464714229, + "timestamp": "2025-10-01 03:28:21.465530", + "step": 5987, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.496774", + "step": 5987, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003395431849639863, + "timestamp": "2025-10-01 03:28:21.520551", + "step": 5988, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.550957", + "step": 5988, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028884501080028713, + "timestamp": "2025-10-01 03:28:21.553127", + "step": 5989, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.585332", + "step": 5989, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022895428992342204, + "timestamp": "2025-10-01 03:28:21.587498", + "step": 5990, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:21.618208", + "step": 5990, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001556995091959834, + "timestamp": "2025-10-01 03:28:21.620835", + "step": 5991, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:21.651526", + "step": 5991, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008088858448900282, + "timestamp": "2025-10-01 03:28:21.682393", + "step": 5992, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:21.729044", + "step": 5992, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014992749493103474, + "timestamp": "2025-10-01 03:28:21.746541", + "step": 5993, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.792612", + "step": 5993, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015086740022525191, + "timestamp": "2025-10-01 03:28:21.810662", + "step": 5994, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.853670", + "step": 5994, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038263629539869726, + "timestamp": "2025-10-01 03:28:21.872291", + "step": 5995, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:21.919169", + "step": 5995, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006015859544277191, + "timestamp": "2025-10-01 03:28:21.964271", + "step": 5996, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:22.014167", + "step": 5996, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008230433450080454, + "timestamp": "2025-10-01 03:28:22.035784", + "step": 5997, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:22.085422", + "step": 5997, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001549851440358907, + "timestamp": "2025-10-01 03:28:22.108757", + "step": 5998, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:22.159453", + "step": 5998, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.61660043685697e-05, + "timestamp": "2025-10-01 03:28:22.176253", + "step": 5999, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:22.214610", + "step": 5999, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024105612828861922, + "timestamp": "2025-10-01 03:28:22.250516", + "step": 6000, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 6000", + "timestamp": "2025-10-01 03:28:27.320945", + "step": 6000, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:27.368155", + "step": 6000, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007731792866252363, + "timestamp": "2025-10-01 03:28:27.378965", + "step": 6001, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:27.440271", + "step": 6001, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034970376873388886, + "timestamp": "2025-10-01 03:28:27.452531", + "step": 6002, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:27.498462", + "step": 6002, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024300756922457367, + "timestamp": "2025-10-01 03:28:27.511872", + "step": 6003, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:27.559208", + "step": 6003, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010775531700346619, + "timestamp": "2025-10-01 03:28:27.586172", + "step": 6004, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:27.624078", + "step": 6004, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002608427021186799, + "timestamp": "2025-10-01 03:28:27.631474", + "step": 6005, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:27.677573", + "step": 6005, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016549191786907613, + "timestamp": "2025-10-01 03:28:27.683572", + "step": 6006, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:27.743552", + "step": 6006, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017052878392860293, + "timestamp": "2025-10-01 03:28:27.748107", + "step": 6007, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:27.786162", + "step": 6007, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034533420694060624, + "timestamp": "2025-10-01 03:28:27.813075", + "step": 6008, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:27.851372", + "step": 6008, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.404198499396443e-05, + "timestamp": "2025-10-01 03:28:27.860924", + "step": 6009, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:27.907551", + "step": 6009, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024012282665353268, + "timestamp": "2025-10-01 03:28:27.915625", + "step": 6010, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:27.955385", + "step": 6010, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021766369172837585, + "timestamp": "2025-10-01 03:28:27.961859", + "step": 6011, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.000262", + "step": 6011, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.039370469748973846, + "timestamp": "2025-10-01 03:28:28.029548", + "step": 6012, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.074274", + "step": 6012, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010410994291305542, + "timestamp": "2025-10-01 03:28:28.082574", + "step": 6013, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.132226", + "step": 6013, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004964878316968679, + "timestamp": "2025-10-01 03:28:28.143418", + "step": 6014, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.197086", + "step": 6014, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006645164103247225, + "timestamp": "2025-10-01 03:28:28.208557", + "step": 6015, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.266603", + "step": 6015, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038608157774433494, + "timestamp": "2025-10-01 03:28:28.295768", + "step": 6016, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.342043", + "step": 6016, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024095263506751508, + "timestamp": "2025-10-01 03:28:28.348283", + "step": 6017, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.384717", + "step": 6017, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021544378250837326, + "timestamp": "2025-10-01 03:28:28.395532", + "step": 6018, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.443246", + "step": 6018, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007568576373159885, + "timestamp": "2025-10-01 03:28:28.449564", + "step": 6019, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.487052", + "step": 6019, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001909963320940733, + "timestamp": "2025-10-01 03:28:28.516208", + "step": 6020, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.560376", + "step": 6020, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002751511929091066, + "timestamp": "2025-10-01 03:28:28.566323", + "step": 6021, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.603132", + "step": 6021, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003308785962872207, + "timestamp": "2025-10-01 03:28:28.613151", + "step": 6022, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.650723", + "step": 6022, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044533840264193714, + "timestamp": "2025-10-01 03:28:28.653888", + "step": 6023, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.705223", + "step": 6023, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016557889059185982, + "timestamp": "2025-10-01 03:28:28.729689", + "step": 6024, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.782428", + "step": 6024, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000617275363765657, + "timestamp": "2025-10-01 03:28:28.785284", + "step": 6025, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.841316", + "step": 6025, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017649575602263212, + "timestamp": "2025-10-01 03:28:28.850999", + "step": 6026, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.901949", + "step": 6026, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005479486426338553, + "timestamp": "2025-10-01 03:28:28.904771", + "step": 6027, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:28.946291", + "step": 6027, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002910241310019046, + "timestamp": "2025-10-01 03:28:28.970625", + "step": 6028, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:29.031951", + "step": 6028, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.223481225082651e-05, + "timestamp": "2025-10-01 03:28:29.036884", + "step": 6029, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:29.095639", + "step": 6029, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03533555939793587, + "timestamp": "2025-10-01 03:28:29.099329", + "step": 6030, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:29.155301", + "step": 6030, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00134129892103374, + "timestamp": "2025-10-01 03:28:29.171058", + "step": 6031, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:29.236872", + "step": 6031, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020832066365983337, + "timestamp": "2025-10-01 03:28:29.266111", + "step": 6032, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:29.309435", + "step": 6032, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.052759405225515366, + "timestamp": "2025-10-01 03:28:29.317584", + "step": 6033, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:29.360935", + "step": 6033, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027723543462343514, + "timestamp": "2025-10-01 03:28:29.370142", + "step": 6034, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:29.427686", + "step": 6034, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041170729673467577, + "timestamp": "2025-10-01 03:28:29.441671", + "step": 6035, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:29.513578", + "step": 6035, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003156572056468576, + "timestamp": "2025-10-01 03:28:29.543908", + "step": 6036, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:29.597965", + "step": 6036, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000340759550454095, + "timestamp": "2025-10-01 03:28:29.604930", + "step": 6037, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:29.674193", + "step": 6037, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001270703796762973, + "timestamp": "2025-10-01 03:28:29.684639", + "step": 6038, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:29.752926", + "step": 6038, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.025731071829795837, + "timestamp": "2025-10-01 03:28:29.757926", + "step": 6039, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:29.809976", + "step": 6039, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002325580921024084, + "timestamp": "2025-10-01 03:28:29.836866", + "step": 6040, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:29.884467", + "step": 6040, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003010534856002778, + "timestamp": "2025-10-01 03:28:29.895074", + "step": 6041, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:29.952499", + "step": 6041, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002203976473538205, + "timestamp": "2025-10-01 03:28:29.958767", + "step": 6042, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:30.034194", + "step": 6042, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007054079324007034, + "timestamp": "2025-10-01 03:28:30.039420", + "step": 6043, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:30.086519", + "step": 6043, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004405816085636616, + "timestamp": "2025-10-01 03:28:30.113931", + "step": 6044, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:30.161688", + "step": 6044, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015991121472325176, + "timestamp": "2025-10-01 03:28:30.166754", + "step": 6045, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:30.245915", + "step": 6045, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002625886700116098, + "timestamp": "2025-10-01 03:28:30.255956", + "step": 6046, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:30.313922", + "step": 6046, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002584913745522499, + "timestamp": "2025-10-01 03:28:30.320955", + "step": 6047, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:30.417165", + "step": 6047, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019529988639988005, + "timestamp": "2025-10-01 03:28:30.449918", + "step": 6048, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:30.542917", + "step": 6048, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023418960627168417, + "timestamp": "2025-10-01 03:28:30.549539", + "step": 6049, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:30.620832", + "step": 6049, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004822514019906521, + "timestamp": "2025-10-01 03:28:30.629209", + "step": 6050, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:28:30.685880", + "step": 6050, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001262156874872744, + "timestamp": "2025-10-01 03:28:30.693326", + "step": 6051, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:30.753233", + "step": 6051, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043263420229777694, + "timestamp": "2025-10-01 03:28:30.784514", + "step": 6052, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:30.855655", + "step": 6052, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001966436830116436, + "timestamp": "2025-10-01 03:28:30.868523", + "step": 6053, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:30.926580", + "step": 6053, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009366596932522953, + "timestamp": "2025-10-01 03:28:30.935256", + "step": 6054, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:31.004092", + "step": 6054, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013539184583351016, + "timestamp": "2025-10-01 03:28:31.010991", + "step": 6055, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:31.069336", + "step": 6055, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05507874861359596, + "timestamp": "2025-10-01 03:28:31.100619", + "step": 6056, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:31.149895", + "step": 6056, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0484459213912487, + "timestamp": "2025-10-01 03:28:31.153453", + "step": 6057, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:31.216521", + "step": 6057, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038765676436014473, + "timestamp": "2025-10-01 03:28:31.219536", + "step": 6058, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:31.275325", + "step": 6058, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018695733160711825, + "timestamp": "2025-10-01 03:28:31.278508", + "step": 6059, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:31.329133", + "step": 6059, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002165666315704584, + "timestamp": "2025-10-01 03:28:31.353928", + "step": 6060, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:31.432233", + "step": 6060, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010307707823812962, + "timestamp": "2025-10-01 03:28:31.434550", + "step": 6061, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:31.468683", + "step": 6061, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001728735660435632, + "timestamp": "2025-10-01 03:28:31.471227", + "step": 6062, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:31.524099", + "step": 6062, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003122317139059305, + "timestamp": "2025-10-01 03:28:31.527416", + "step": 6063, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:31.568137", + "step": 6063, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003285377169959247, + "timestamp": "2025-10-01 03:28:31.595274", + "step": 6064, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:31.639509", + "step": 6064, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.030699731782078743, + "timestamp": "2025-10-01 03:28:31.645979", + "step": 6065, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:31.696852", + "step": 6065, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0037051946856081486, + "timestamp": "2025-10-01 03:28:31.713251", + "step": 6066, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:31.754839", + "step": 6066, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013397012662608176, + "timestamp": "2025-10-01 03:28:31.758884", + "step": 6067, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:31.819814", + "step": 6067, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03649317845702171, + "timestamp": "2025-10-01 03:28:31.851922", + "step": 6068, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:31.889819", + "step": 6068, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001743529923260212, + "timestamp": "2025-10-01 03:28:31.900974", + "step": 6069, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:31.942477", + "step": 6069, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005487239453941584, + "timestamp": "2025-10-01 03:28:31.946910", + "step": 6070, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:32.020559", + "step": 6070, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020031232270412147, + "timestamp": "2025-10-01 03:28:32.030469", + "step": 6071, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:32.067804", + "step": 6071, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005483613349497318, + "timestamp": "2025-10-01 03:28:32.110346", + "step": 6072, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:32.150866", + "step": 6072, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010789139196276665, + "timestamp": "2025-10-01 03:28:32.158379", + "step": 6073, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:32.202141", + "step": 6073, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016240259865298867, + "timestamp": "2025-10-01 03:28:32.208276", + "step": 6074, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:32.257157", + "step": 6074, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014828704297542572, + "timestamp": "2025-10-01 03:28:32.272064", + "step": 6075, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:32.319074", + "step": 6075, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023057644721120596, + "timestamp": "2025-10-01 03:28:32.351811", + "step": 6076, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:32.392389", + "step": 6076, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0055144536308944225, + "timestamp": "2025-10-01 03:28:32.396906", + "step": 6077, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:32.459784", + "step": 6077, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005029925378039479, + "timestamp": "2025-10-01 03:28:32.465295", + "step": 6078, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:32.515197", + "step": 6078, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021142931655049324, + "timestamp": "2025-10-01 03:28:32.522628", + "step": 6079, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:32.563031", + "step": 6079, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000333773234160617, + "timestamp": "2025-10-01 03:28:32.594380", + "step": 6080, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:28:35.439511", + "step": 6080, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2356856.1169318394, + "timestamp": "2025-10-01 03:28:35.450749", + "step": 6080, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:35.492790", + "step": 6080, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019735447131097317, + "timestamp": "2025-10-01 03:28:35.503758", + "step": 6081, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:35.543599", + "step": 6081, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018354002386331558, + "timestamp": "2025-10-01 03:28:35.553845", + "step": 6082, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:35.592115", + "step": 6082, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008919007959775627, + "timestamp": "2025-10-01 03:28:35.602205", + "step": 6083, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:35.641337", + "step": 6083, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004092539194971323, + "timestamp": "2025-10-01 03:28:35.673301", + "step": 6084, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:35.713595", + "step": 6084, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006555365980602801, + "timestamp": "2025-10-01 03:28:35.723362", + "step": 6085, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:35.766510", + "step": 6085, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003269672393798828, + "timestamp": "2025-10-01 03:28:35.775778", + "step": 6086, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:35.815671", + "step": 6086, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000811325735412538, + "timestamp": "2025-10-01 03:28:35.824881", + "step": 6087, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:35.864793", + "step": 6087, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014374430757015944, + "timestamp": "2025-10-01 03:28:35.897154", + "step": 6088, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:35.938906", + "step": 6088, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012300249363761395, + "timestamp": "2025-10-01 03:28:35.949819", + "step": 6089, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:35.995048", + "step": 6089, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.028352661058306694, + "timestamp": "2025-10-01 03:28:36.004226", + "step": 6090, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:36.046692", + "step": 6090, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024591798428446054, + "timestamp": "2025-10-01 03:28:36.057094", + "step": 6091, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:36.097171", + "step": 6091, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047593103954568505, + "timestamp": "2025-10-01 03:28:36.128070", + "step": 6092, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:36.167502", + "step": 6092, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008281086338683963, + "timestamp": "2025-10-01 03:28:36.178909", + "step": 6093, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:36.220889", + "step": 6093, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016354332910850644, + "timestamp": "2025-10-01 03:28:36.231093", + "step": 6094, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:36.273206", + "step": 6094, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007046287064440548, + "timestamp": "2025-10-01 03:28:36.286821", + "step": 6095, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:36.336508", + "step": 6095, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005115997628308833, + "timestamp": "2025-10-01 03:28:36.369539", + "step": 6096, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:36.414885", + "step": 6096, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002696137526072562, + "timestamp": "2025-10-01 03:28:36.424356", + "step": 6097, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:36.466433", + "step": 6097, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014320051996037364, + "timestamp": "2025-10-01 03:28:36.481439", + "step": 6098, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:36.523528", + "step": 6098, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029574596555903554, + "timestamp": "2025-10-01 03:28:36.536680", + "step": 6099, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:36.581044", + "step": 6099, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01987677998840809, + "timestamp": "2025-10-01 03:28:36.615798", + "step": 6100, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:36.659061", + "step": 6100, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002022254979237914, + "timestamp": "2025-10-01 03:28:36.669338", + "step": 6101, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:36.712447", + "step": 6101, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005752549506723881, + "timestamp": "2025-10-01 03:28:36.722385", + "step": 6102, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:36.761283", + "step": 6102, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004535601940006018, + "timestamp": "2025-10-01 03:28:36.771216", + "step": 6103, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:36.814136", + "step": 6103, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03483063727617264, + "timestamp": "2025-10-01 03:28:36.846003", + "step": 6104, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:36.890570", + "step": 6104, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02787584252655506, + "timestamp": "2025-10-01 03:28:36.903072", + "step": 6105, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:36.943713", + "step": 6105, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041438385960645974, + "timestamp": "2025-10-01 03:28:36.955425", + "step": 6106, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:37.000506", + "step": 6106, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025858115986920893, + "timestamp": "2025-10-01 03:28:37.011475", + "step": 6107, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:37.059387", + "step": 6107, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002626058121677488, + "timestamp": "2025-10-01 03:28:37.095331", + "step": 6108, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:37.148039", + "step": 6108, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003082612529397011, + "timestamp": "2025-10-01 03:28:37.167201", + "step": 6109, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:37.214448", + "step": 6109, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010363038163632154, + "timestamp": "2025-10-01 03:28:37.230546", + "step": 6110, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:37.278531", + "step": 6110, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017681096214801073, + "timestamp": "2025-10-01 03:28:37.291365", + "step": 6111, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:37.332571", + "step": 6111, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000271603959845379, + "timestamp": "2025-10-01 03:28:37.368542", + "step": 6112, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:37.411242", + "step": 6112, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042173851397819817, + "timestamp": "2025-10-01 03:28:37.426324", + "step": 6113, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:37.476835", + "step": 6113, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.043716151267290115, + "timestamp": "2025-10-01 03:28:37.487532", + "step": 6114, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:37.549451", + "step": 6114, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011229760712012649, + "timestamp": "2025-10-01 03:28:37.561503", + "step": 6115, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:37.602597", + "step": 6115, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004019697662442923, + "timestamp": "2025-10-01 03:28:37.634933", + "step": 6116, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:37.674422", + "step": 6116, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031079017207957804, + "timestamp": "2025-10-01 03:28:37.685865", + "step": 6117, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:37.726957", + "step": 6117, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02134338580071926, + "timestamp": "2025-10-01 03:28:37.736891", + "step": 6118, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:28:37.778972", + "step": 6118, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026405658572912216, + "timestamp": "2025-10-01 03:28:37.791147", + "step": 6119, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:37.835051", + "step": 6119, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045275091542862356, + "timestamp": "2025-10-01 03:28:37.867358", + "step": 6120, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:37.908466", + "step": 6120, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004811377148143947, + "timestamp": "2025-10-01 03:28:37.921380", + "step": 6121, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:37.963411", + "step": 6121, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003904821933247149, + "timestamp": "2025-10-01 03:28:37.976938", + "step": 6122, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.027283", + "step": 6122, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003528189263306558, + "timestamp": "2025-10-01 03:28:38.037874", + "step": 6123, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.080351", + "step": 6123, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011630311608314514, + "timestamp": "2025-10-01 03:28:38.111278", + "step": 6124, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.150377", + "step": 6124, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00793970562517643, + "timestamp": "2025-10-01 03:28:38.162438", + "step": 6125, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.204767", + "step": 6125, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017676298739388585, + "timestamp": "2025-10-01 03:28:38.214447", + "step": 6126, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.255112", + "step": 6126, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007889618282206357, + "timestamp": "2025-10-01 03:28:38.267968", + "step": 6127, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.310637", + "step": 6127, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011547950096428394, + "timestamp": "2025-10-01 03:28:38.347112", + "step": 6128, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.391333", + "step": 6128, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012676677433773875, + "timestamp": "2025-10-01 03:28:38.395081", + "step": 6129, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.439515", + "step": 6129, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010009672259911895, + "timestamp": "2025-10-01 03:28:38.454907", + "step": 6130, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.503329", + "step": 6130, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007055386668071151, + "timestamp": "2025-10-01 03:28:38.519847", + "step": 6131, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.563683", + "step": 6131, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018634216394275427, + "timestamp": "2025-10-01 03:28:38.596598", + "step": 6132, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.636837", + "step": 6132, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020257265714462847, + "timestamp": "2025-10-01 03:28:38.640748", + "step": 6133, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:38.680876", + "step": 6133, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004371062677819282, + "timestamp": "2025-10-01 03:28:38.692441", + "step": 6134, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.732756", + "step": 6134, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005573660018853843, + "timestamp": "2025-10-01 03:28:38.743647", + "step": 6135, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.783531", + "step": 6135, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001089296187274158, + "timestamp": "2025-10-01 03:28:38.817154", + "step": 6136, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.858977", + "step": 6136, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.029729362577199936, + "timestamp": "2025-10-01 03:28:38.869802", + "step": 6137, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:38.912576", + "step": 6137, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05877429619431496, + "timestamp": "2025-10-01 03:28:38.922368", + "step": 6138, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:38.966147", + "step": 6138, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007474120357073843, + "timestamp": "2025-10-01 03:28:38.976024", + "step": 6139, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.017030", + "step": 6139, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041548232547938824, + "timestamp": "2025-10-01 03:28:39.049397", + "step": 6140, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.089868", + "step": 6140, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005826060194522142, + "timestamp": "2025-10-01 03:28:39.101523", + "step": 6141, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.141914", + "step": 6141, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013980504591017962, + "timestamp": "2025-10-01 03:28:39.153134", + "step": 6142, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.196842", + "step": 6142, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001685896422713995, + "timestamp": "2025-10-01 03:28:39.210353", + "step": 6143, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.252035", + "step": 6143, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015832276549190283, + "timestamp": "2025-10-01 03:28:39.284333", + "step": 6144, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.324672", + "step": 6144, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001527388347312808, + "timestamp": "2025-10-01 03:28:39.333929", + "step": 6145, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.375109", + "step": 6145, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005933981738053262, + "timestamp": "2025-10-01 03:28:39.385367", + "step": 6146, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:39.425046", + "step": 6146, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001183074084110558, + "timestamp": "2025-10-01 03:28:39.435432", + "step": 6147, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.484875", + "step": 6147, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005740051623433828, + "timestamp": "2025-10-01 03:28:39.515838", + "step": 6148, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.554623", + "step": 6148, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022213261108845472, + "timestamp": "2025-10-01 03:28:39.565815", + "step": 6149, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.603893", + "step": 6149, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011214909609407187, + "timestamp": "2025-10-01 03:28:39.613673", + "step": 6150, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.653296", + "step": 6150, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003210000286344439, + "timestamp": "2025-10-01 03:28:39.664040", + "step": 6151, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.703448", + "step": 6151, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004417891032062471, + "timestamp": "2025-10-01 03:28:39.735859", + "step": 6152, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:28:39.776038", + "step": 6152, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001535395858809352, + "timestamp": "2025-10-01 03:28:39.786578", + "step": 6153, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.820510", + "step": 6153, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005670074024237692, + "timestamp": "2025-10-01 03:28:39.828552", + "step": 6154, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:39.871279", + "step": 6154, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009638863615691662, + "timestamp": "2025-10-01 03:28:39.879639", + "step": 6155, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:39.925552", + "step": 6155, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010170520981773734, + "timestamp": "2025-10-01 03:28:39.956602", + "step": 6156, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:39.999316", + "step": 6156, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017022847896441817, + "timestamp": "2025-10-01 03:28:40.012639", + "step": 6157, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:40.056905", + "step": 6157, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004801151342689991, + "timestamp": "2025-10-01 03:28:40.069621", + "step": 6158, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:40.109705", + "step": 6158, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019211843609809875, + "timestamp": "2025-10-01 03:28:40.119067", + "step": 6159, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:40.157789", + "step": 6159, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007190780597738922, + "timestamp": "2025-10-01 03:28:40.188503", + "step": 6160, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:40.229668", + "step": 6160, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009963945485651493, + "timestamp": "2025-10-01 03:28:40.238761", + "step": 6161, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:40.278427", + "step": 6161, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003111664205789566, + "timestamp": "2025-10-01 03:28:40.282600", + "step": 6162, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:40.317496", + "step": 6162, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011331161949783564, + "timestamp": "2025-10-01 03:28:40.327540", + "step": 6163, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:40.364773", + "step": 6163, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003577703610062599, + "timestamp": "2025-10-01 03:28:40.393166", + "step": 6164, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:40.441045", + "step": 6164, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006203929660841823, + "timestamp": "2025-10-01 03:28:40.448432", + "step": 6165, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:40.485288", + "step": 6165, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000860570406075567, + "timestamp": "2025-10-01 03:28:40.492872", + "step": 6166, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:40.529889", + "step": 6166, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012501238379627466, + "timestamp": "2025-10-01 03:28:40.537916", + "step": 6167, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:40.578201", + "step": 6167, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006843901355750859, + "timestamp": "2025-10-01 03:28:40.609364", + "step": 6168, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:40.655853", + "step": 6168, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005907280137762427, + "timestamp": "2025-10-01 03:28:40.661422", + "step": 6169, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:40.699450", + "step": 6169, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000590460083913058, + "timestamp": "2025-10-01 03:28:40.703400", + "step": 6170, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:40.737691", + "step": 6170, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028089438565075397, + "timestamp": "2025-10-01 03:28:40.741485", + "step": 6171, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:40.773374", + "step": 6171, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004699463956058025, + "timestamp": "2025-10-01 03:28:40.798523", + "step": 6172, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:40.833201", + "step": 6172, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010081615764647722, + "timestamp": "2025-10-01 03:28:40.841666", + "step": 6173, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:40.883669", + "step": 6173, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003726335125975311, + "timestamp": "2025-10-01 03:28:40.887149", + "step": 6174, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:40.927082", + "step": 6174, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002766792313195765, + "timestamp": "2025-10-01 03:28:40.937875", + "step": 6175, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:40.978500", + "step": 6175, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002899132377933711, + "timestamp": "2025-10-01 03:28:41.013812", + "step": 6176, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:41.070107", + "step": 6176, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0037613154854625463, + "timestamp": "2025-10-01 03:28:41.081746", + "step": 6177, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:41.124025", + "step": 6177, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023554703511763364, + "timestamp": "2025-10-01 03:28:41.138091", + "step": 6178, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:41.183410", + "step": 6178, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003669647849164903, + "timestamp": "2025-10-01 03:28:41.197356", + "step": 6179, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:41.240175", + "step": 6179, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006063595646992326, + "timestamp": "2025-10-01 03:28:41.274180", + "step": 6180, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:41.325129", + "step": 6180, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034185119438916445, + "timestamp": "2025-10-01 03:28:41.337866", + "step": 6181, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:41.376979", + "step": 6181, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.030060069635510445, + "timestamp": "2025-10-01 03:28:41.383591", + "step": 6182, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:41.420514", + "step": 6182, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009522869368083775, + "timestamp": "2025-10-01 03:28:41.427493", + "step": 6183, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:41.465288", + "step": 6183, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022301646822597831, + "timestamp": "2025-10-01 03:28:41.495041", + "step": 6184, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:41.533891", + "step": 6184, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004391771217342466, + "timestamp": "2025-10-01 03:28:41.545283", + "step": 6185, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:41.587002", + "step": 6185, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003043701872229576, + "timestamp": "2025-10-01 03:28:41.597116", + "step": 6186, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:41.635441", + "step": 6186, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010005327640101314, + "timestamp": "2025-10-01 03:28:41.645963", + "step": 6187, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:41.684496", + "step": 6187, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019258175045251846, + "timestamp": "2025-10-01 03:28:41.711835", + "step": 6188, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:41.756662", + "step": 6188, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001553467009216547, + "timestamp": "2025-10-01 03:28:41.764358", + "step": 6189, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:41.800925", + "step": 6189, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047045029350556433, + "timestamp": "2025-10-01 03:28:41.808281", + "step": 6190, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:41.845885", + "step": 6190, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004550444718915969, + "timestamp": "2025-10-01 03:28:41.852404", + "step": 6191, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:41.886676", + "step": 6191, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017887834110297263, + "timestamp": "2025-10-01 03:28:41.916636", + "step": 6192, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:41.951805", + "step": 6192, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017184024676680565, + "timestamp": "2025-10-01 03:28:41.958538", + "step": 6193, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:41.998028", + "step": 6193, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.024018989875912666, + "timestamp": "2025-10-01 03:28:42.006852", + "step": 6194, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:42.058328", + "step": 6194, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0498906746506691, + "timestamp": "2025-10-01 03:28:42.067434", + "step": 6195, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:42.103859", + "step": 6195, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00039209864917211235, + "timestamp": "2025-10-01 03:28:42.133362", + "step": 6196, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:42.182639", + "step": 6196, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012884738389402628, + "timestamp": "2025-10-01 03:28:42.192144", + "step": 6197, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:42.236968", + "step": 6197, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017497526481747627, + "timestamp": "2025-10-01 03:28:42.245861", + "step": 6198, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:42.283977", + "step": 6198, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007081832736730576, + "timestamp": "2025-10-01 03:28:42.293364", + "step": 6199, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:42.330852", + "step": 6199, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048519964911974967, + "timestamp": "2025-10-01 03:28:42.367336", + "step": 6200, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:42.411324", + "step": 6200, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015005349414423108, + "timestamp": "2025-10-01 03:28:42.426904", + "step": 6201, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:42.471669", + "step": 6201, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008771985885687172, + "timestamp": "2025-10-01 03:28:42.475908", + "step": 6202, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:42.511587", + "step": 6202, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019857918843626976, + "timestamp": "2025-10-01 03:28:42.524755", + "step": 6203, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:42.566584", + "step": 6203, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013166065036784858, + "timestamp": "2025-10-01 03:28:42.603317", + "step": 6204, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:42.643934", + "step": 6204, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036412611370906234, + "timestamp": "2025-10-01 03:28:42.656363", + "step": 6205, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:42.697782", + "step": 6205, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040871926466934383, + "timestamp": "2025-10-01 03:28:42.710784", + "step": 6206, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:42.752545", + "step": 6206, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001189350223285146, + "timestamp": "2025-10-01 03:28:42.763945", + "step": 6207, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:42.809685", + "step": 6207, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016093224985525012, + "timestamp": "2025-10-01 03:28:42.843546", + "step": 6208, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:42.884405", + "step": 6208, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006038138526491821, + "timestamp": "2025-10-01 03:28:42.894637", + "step": 6209, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:42.928450", + "step": 6209, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007334313704632223, + "timestamp": "2025-10-01 03:28:42.936562", + "step": 6210, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:42.974986", + "step": 6210, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007567384745925665, + "timestamp": "2025-10-01 03:28:42.983595", + "step": 6211, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:43.020786", + "step": 6211, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028847664361819625, + "timestamp": "2025-10-01 03:28:43.052225", + "step": 6212, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:43.093831", + "step": 6212, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047453370643779635, + "timestamp": "2025-10-01 03:28:43.104757", + "step": 6213, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:43.144977", + "step": 6213, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008854395127855241, + "timestamp": "2025-10-01 03:28:43.153063", + "step": 6214, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:43.195036", + "step": 6214, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.051241617649793625, + "timestamp": "2025-10-01 03:28:43.202292", + "step": 6215, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:43.241806", + "step": 6215, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0205171350389719, + "timestamp": "2025-10-01 03:28:43.271180", + "step": 6216, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:43.313157", + "step": 6216, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008025976712815464, + "timestamp": "2025-10-01 03:28:43.320883", + "step": 6217, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:43.357360", + "step": 6217, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05323037505149841, + "timestamp": "2025-10-01 03:28:43.364762", + "step": 6218, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:43.397822", + "step": 6218, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031299698166549206, + "timestamp": "2025-10-01 03:28:43.401013", + "step": 6219, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:43.433125", + "step": 6219, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.029021883383393288, + "timestamp": "2025-10-01 03:28:43.461181", + "step": 6220, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:43.500468", + "step": 6220, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011236929567530751, + "timestamp": "2025-10-01 03:28:43.507442", + "step": 6221, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:43.550670", + "step": 6221, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013318363926373422, + "timestamp": "2025-10-01 03:28:43.559060", + "step": 6222, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:43.604990", + "step": 6222, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006636324105784297, + "timestamp": "2025-10-01 03:28:43.611752", + "step": 6223, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:43.647590", + "step": 6223, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045627582585439086, + "timestamp": "2025-10-01 03:28:43.679542", + "step": 6224, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:43.723600", + "step": 6224, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002741872100159526, + "timestamp": "2025-10-01 03:28:43.736806", + "step": 6225, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:43.783690", + "step": 6225, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033241018536500633, + "timestamp": "2025-10-01 03:28:43.797510", + "step": 6226, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:43.843747", + "step": 6226, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007514139288105071, + "timestamp": "2025-10-01 03:28:43.856536", + "step": 6227, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:43.898833", + "step": 6227, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031960621709004045, + "timestamp": "2025-10-01 03:28:43.932245", + "step": 6228, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:43.973446", + "step": 6228, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021327221766114235, + "timestamp": "2025-10-01 03:28:43.985758", + "step": 6229, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:44.029218", + "step": 6229, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006047034170478582, + "timestamp": "2025-10-01 03:28:44.040273", + "step": 6230, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:44.081858", + "step": 6230, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008478516829200089, + "timestamp": "2025-10-01 03:28:44.095075", + "step": 6231, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:44.139688", + "step": 6231, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030877560493536294, + "timestamp": "2025-10-01 03:28:44.174981", + "step": 6232, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:28:46.488376", + "step": 6232, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2141312.3372761738, + "timestamp": "2025-10-01 03:28:46.495469", + "step": 6232, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:46.542051", + "step": 6232, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.030408846214413643, + "timestamp": "2025-10-01 03:28:46.549102", + "step": 6233, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:28:46.586207", + "step": 6233, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006257212953642011, + "timestamp": "2025-10-01 03:28:46.589545", + "step": 6234, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:46.623978", + "step": 6234, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019493652507662773, + "timestamp": "2025-10-01 03:28:46.634125", + "step": 6235, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:46.668168", + "step": 6235, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012479970464482903, + "timestamp": "2025-10-01 03:28:46.695655", + "step": 6236, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:46.736515", + "step": 6236, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017253078112844378, + "timestamp": "2025-10-01 03:28:46.748793", + "step": 6237, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:46.788912", + "step": 6237, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03656548261642456, + "timestamp": "2025-10-01 03:28:46.798335", + "step": 6238, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:46.837512", + "step": 6238, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030968504142947495, + "timestamp": "2025-10-01 03:28:46.849635", + "step": 6239, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:46.882262", + "step": 6239, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004365148488432169, + "timestamp": "2025-10-01 03:28:46.907557", + "step": 6240, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:46.960221", + "step": 6240, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029242911841720343, + "timestamp": "2025-10-01 03:28:46.974733", + "step": 6241, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.019743", + "step": 6241, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017100899713113904, + "timestamp": "2025-10-01 03:28:47.034571", + "step": 6242, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:47.078666", + "step": 6242, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001783497747965157, + "timestamp": "2025-10-01 03:28:47.091886", + "step": 6243, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.134881", + "step": 6243, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011544720036908984, + "timestamp": "2025-10-01 03:28:47.168784", + "step": 6244, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.208756", + "step": 6244, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007117727305740118, + "timestamp": "2025-10-01 03:28:47.222394", + "step": 6245, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.267336", + "step": 6245, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005851133028045297, + "timestamp": "2025-10-01 03:28:47.284187", + "step": 6246, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.330289", + "step": 6246, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022854852431919426, + "timestamp": "2025-10-01 03:28:47.346687", + "step": 6247, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.397842", + "step": 6247, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024811848998069763, + "timestamp": "2025-10-01 03:28:47.432698", + "step": 6248, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.484937", + "step": 6248, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012012106599286199, + "timestamp": "2025-10-01 03:28:47.495983", + "step": 6249, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.539073", + "step": 6249, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002529285615310073, + "timestamp": "2025-10-01 03:28:47.549110", + "step": 6250, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.588249", + "step": 6250, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001566413207910955, + "timestamp": "2025-10-01 03:28:47.591879", + "step": 6251, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:47.623924", + "step": 6251, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03552720695734024, + "timestamp": "2025-10-01 03:28:47.655943", + "step": 6252, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.696681", + "step": 6252, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005514866788871586, + "timestamp": "2025-10-01 03:28:47.709164", + "step": 6253, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.749546", + "step": 6253, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013725782744586468, + "timestamp": "2025-10-01 03:28:47.759692", + "step": 6254, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.797253", + "step": 6254, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001352589693851769, + "timestamp": "2025-10-01 03:28:47.806807", + "step": 6255, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.850217", + "step": 6255, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002919236139860004, + "timestamp": "2025-10-01 03:28:47.886733", + "step": 6256, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:47.931445", + "step": 6256, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011325044557452202, + "timestamp": "2025-10-01 03:28:47.945400", + "step": 6257, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:47.987250", + "step": 6257, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008187899366021156, + "timestamp": "2025-10-01 03:28:47.995305", + "step": 6258, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.034921", + "step": 6258, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019677176605910063, + "timestamp": "2025-10-01 03:28:48.046484", + "step": 6259, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.086393", + "step": 6259, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043620276846922934, + "timestamp": "2025-10-01 03:28:48.118607", + "step": 6260, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.154572", + "step": 6260, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036463068681769073, + "timestamp": "2025-10-01 03:28:48.161527", + "step": 6261, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.200263", + "step": 6261, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005335421301424503, + "timestamp": "2025-10-01 03:28:48.203032", + "step": 6262, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.240143", + "step": 6262, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000263995083514601, + "timestamp": "2025-10-01 03:28:48.248053", + "step": 6263, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.286495", + "step": 6263, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001282940269447863, + "timestamp": "2025-10-01 03:28:48.311535", + "step": 6264, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.350371", + "step": 6264, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005923701100982726, + "timestamp": "2025-10-01 03:28:48.359891", + "step": 6265, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.395872", + "step": 6265, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008318753680214286, + "timestamp": "2025-10-01 03:28:48.404051", + "step": 6266, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.440710", + "step": 6266, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007077197660692036, + "timestamp": "2025-10-01 03:28:48.448203", + "step": 6267, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:48.486865", + "step": 6267, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0038314482662826777, + "timestamp": "2025-10-01 03:28:48.517525", + "step": 6268, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.554535", + "step": 6268, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006689018569886684, + "timestamp": "2025-10-01 03:28:48.563466", + "step": 6269, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.599252", + "step": 6269, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010459410259500146, + "timestamp": "2025-10-01 03:28:48.608631", + "step": 6270, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.656293", + "step": 6270, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016595299821346998, + "timestamp": "2025-10-01 03:28:48.661425", + "step": 6271, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.705022", + "step": 6271, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022571739100385457, + "timestamp": "2025-10-01 03:28:48.731041", + "step": 6272, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.776951", + "step": 6272, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029518650262616575, + "timestamp": "2025-10-01 03:28:48.791213", + "step": 6273, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:48.835990", + "step": 6273, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003221795428544283, + "timestamp": "2025-10-01 03:28:48.851594", + "step": 6274, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.895634", + "step": 6274, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007457779720425606, + "timestamp": "2025-10-01 03:28:48.910387", + "step": 6275, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:48.956814", + "step": 6275, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004772479005623609, + "timestamp": "2025-10-01 03:28:48.990431", + "step": 6276, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.029889", + "step": 6276, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018981509492732584, + "timestamp": "2025-10-01 03:28:49.040516", + "step": 6277, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:49.082283", + "step": 6277, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000636169861536473, + "timestamp": "2025-10-01 03:28:49.094269", + "step": 6278, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.135845", + "step": 6278, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022367722704075277, + "timestamp": "2025-10-01 03:28:49.144354", + "step": 6279, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.195877", + "step": 6279, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018477825506124645, + "timestamp": "2025-10-01 03:28:49.227306", + "step": 6280, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:49.265798", + "step": 6280, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0027848121244460344, + "timestamp": "2025-10-01 03:28:49.274257", + "step": 6281, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.315072", + "step": 6281, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015952285612002015, + "timestamp": "2025-10-01 03:28:49.326505", + "step": 6282, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.376280", + "step": 6282, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021900555293541402, + "timestamp": "2025-10-01 03:28:49.385430", + "step": 6283, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.425878", + "step": 6283, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004424682992976159, + "timestamp": "2025-10-01 03:28:49.456031", + "step": 6284, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:49.495791", + "step": 6284, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037339908885769546, + "timestamp": "2025-10-01 03:28:49.508776", + "step": 6285, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.559888", + "step": 6285, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007873387658037245, + "timestamp": "2025-10-01 03:28:49.566184", + "step": 6286, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.608413", + "step": 6286, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008832631283439696, + "timestamp": "2025-10-01 03:28:49.614023", + "step": 6287, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.665473", + "step": 6287, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011449972167611122, + "timestamp": "2025-10-01 03:28:49.694740", + "step": 6288, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.739559", + "step": 6288, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006613481091335416, + "timestamp": "2025-10-01 03:28:49.747578", + "step": 6289, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.789910", + "step": 6289, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0038667491171509027, + "timestamp": "2025-10-01 03:28:49.797607", + "step": 6290, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.835852", + "step": 6290, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001563411788083613, + "timestamp": "2025-10-01 03:28:49.843652", + "step": 6291, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.882570", + "step": 6291, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009482089662924409, + "timestamp": "2025-10-01 03:28:49.911831", + "step": 6292, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:49.961712", + "step": 6292, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006885853363201022, + "timestamp": "2025-10-01 03:28:49.972185", + "step": 6293, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.024857", + "step": 6293, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030160616734065115, + "timestamp": "2025-10-01 03:28:50.036776", + "step": 6294, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:50.089804", + "step": 6294, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005865063983947039, + "timestamp": "2025-10-01 03:28:50.100613", + "step": 6295, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.141933", + "step": 6295, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000285749469185248, + "timestamp": "2025-10-01 03:28:50.174588", + "step": 6296, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.233172", + "step": 6296, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005494563840329647, + "timestamp": "2025-10-01 03:28:50.247272", + "step": 6297, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.288883", + "step": 6297, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02887454628944397, + "timestamp": "2025-10-01 03:28:50.299922", + "step": 6298, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.346448", + "step": 6298, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.024210186675190926, + "timestamp": "2025-10-01 03:28:50.358693", + "step": 6299, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.415960", + "step": 6299, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017735747387632728, + "timestamp": "2025-10-01 03:28:50.448180", + "step": 6300, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.500137", + "step": 6300, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004896693280898035, + "timestamp": "2025-10-01 03:28:50.509991", + "step": 6301, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:50.565264", + "step": 6301, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009363715536892414, + "timestamp": "2025-10-01 03:28:50.577839", + "step": 6302, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.632163", + "step": 6302, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003397892287466675, + "timestamp": "2025-10-01 03:28:50.641272", + "step": 6303, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.682493", + "step": 6303, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018337887013331056, + "timestamp": "2025-10-01 03:28:50.715289", + "step": 6304, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.753822", + "step": 6304, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003748177783563733, + "timestamp": "2025-10-01 03:28:50.761893", + "step": 6305, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.808439", + "step": 6305, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011840417282655835, + "timestamp": "2025-10-01 03:28:50.817599", + "step": 6306, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.856143", + "step": 6306, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005033842171542346, + "timestamp": "2025-10-01 03:28:50.862515", + "step": 6307, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.901881", + "step": 6307, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002999019343405962, + "timestamp": "2025-10-01 03:28:50.928351", + "step": 6308, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:50.970667", + "step": 6308, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003405078314244747, + "timestamp": "2025-10-01 03:28:50.974741", + "step": 6309, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.014992", + "step": 6309, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.76132253324613e-05, + "timestamp": "2025-10-01 03:28:51.021291", + "step": 6310, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.061704", + "step": 6310, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011145021999254823, + "timestamp": "2025-10-01 03:28:51.067334", + "step": 6311, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.103897", + "step": 6311, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024199087638407946, + "timestamp": "2025-10-01 03:28:51.130341", + "step": 6312, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:51.166891", + "step": 6312, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007104497053660452, + "timestamp": "2025-10-01 03:28:51.171654", + "step": 6313, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.209360", + "step": 6313, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.025592071935534477, + "timestamp": "2025-10-01 03:28:51.215416", + "step": 6314, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.261263", + "step": 6314, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.040587954223155975, + "timestamp": "2025-10-01 03:28:51.271089", + "step": 6315, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.313831", + "step": 6315, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005072992644272745, + "timestamp": "2025-10-01 03:28:51.340444", + "step": 6316, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.384188", + "step": 6316, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00506155239418149, + "timestamp": "2025-10-01 03:28:51.389955", + "step": 6317, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:51.428421", + "step": 6317, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006555708241648972, + "timestamp": "2025-10-01 03:28:51.435530", + "step": 6318, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:51.487534", + "step": 6318, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019541291112545878, + "timestamp": "2025-10-01 03:28:51.495946", + "step": 6319, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.537782", + "step": 6319, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047197911771945655, + "timestamp": "2025-10-01 03:28:51.568537", + "step": 6320, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.616714", + "step": 6320, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04614593833684921, + "timestamp": "2025-10-01 03:28:51.630392", + "step": 6321, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.679143", + "step": 6321, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013314314419403672, + "timestamp": "2025-10-01 03:28:51.693052", + "step": 6322, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.740127", + "step": 6322, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001248229295015335, + "timestamp": "2025-10-01 03:28:51.751662", + "step": 6323, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.795766", + "step": 6323, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007300112629309297, + "timestamp": "2025-10-01 03:28:51.827443", + "step": 6324, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.875703", + "step": 6324, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006613673758693039, + "timestamp": "2025-10-01 03:28:51.887265", + "step": 6325, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.929590", + "step": 6325, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000981594668701291, + "timestamp": "2025-10-01 03:28:51.941132", + "step": 6326, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:51.989023", + "step": 6326, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0036401450634002686, + "timestamp": "2025-10-01 03:28:51.995371", + "step": 6327, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:52.033496", + "step": 6327, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016630595549941063, + "timestamp": "2025-10-01 03:28:52.060478", + "step": 6328, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.100403", + "step": 6328, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003551699628587812, + "timestamp": "2025-10-01 03:28:52.108880", + "step": 6329, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.152692", + "step": 6329, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.10556633770465851, + "timestamp": "2025-10-01 03:28:52.164566", + "step": 6330, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.207782", + "step": 6330, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034466441720724106, + "timestamp": "2025-10-01 03:28:52.216906", + "step": 6331, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.254527", + "step": 6331, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.800264524528757e-05, + "timestamp": "2025-10-01 03:28:52.281246", + "step": 6332, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:52.322898", + "step": 6332, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011426910758018494, + "timestamp": "2025-10-01 03:28:52.327557", + "step": 6333, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.365993", + "step": 6333, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018349306657910347, + "timestamp": "2025-10-01 03:28:52.374392", + "step": 6334, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.419500", + "step": 6334, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001419191830791533, + "timestamp": "2025-10-01 03:28:52.431258", + "step": 6335, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.473385", + "step": 6335, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023910794407129288, + "timestamp": "2025-10-01 03:28:52.504397", + "step": 6336, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.542262", + "step": 6336, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007283512037247419, + "timestamp": "2025-10-01 03:28:52.548072", + "step": 6337, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:52.588537", + "step": 6337, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.057677775621414185, + "timestamp": "2025-10-01 03:28:52.594481", + "step": 6338, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.631781", + "step": 6338, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013232912169769406, + "timestamp": "2025-10-01 03:28:52.637738", + "step": 6339, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.676097", + "step": 6339, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000559836917091161, + "timestamp": "2025-10-01 03:28:52.703397", + "step": 6340, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.754410", + "step": 6340, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020880343799944967, + "timestamp": "2025-10-01 03:28:52.759477", + "step": 6341, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.798581", + "step": 6341, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007801289902999997, + "timestamp": "2025-10-01 03:28:52.809339", + "step": 6342, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.853255", + "step": 6342, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004247410222887993, + "timestamp": "2025-10-01 03:28:52.863822", + "step": 6343, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:52.908013", + "step": 6343, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012007947079837322, + "timestamp": "2025-10-01 03:28:52.941253", + "step": 6344, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:52.986326", + "step": 6344, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003767525195144117, + "timestamp": "2025-10-01 03:28:52.999450", + "step": 6345, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:53.040390", + "step": 6345, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002397851785644889, + "timestamp": "2025-10-01 03:28:53.052261", + "step": 6346, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:53.101581", + "step": 6346, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008215055800974369, + "timestamp": "2025-10-01 03:28:53.112406", + "step": 6347, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:53.154697", + "step": 6347, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007172293844632804, + "timestamp": "2025-10-01 03:28:53.189379", + "step": 6348, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:53.238298", + "step": 6348, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003837987605948001, + "timestamp": "2025-10-01 03:28:53.250905", + "step": 6349, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:53.285778", + "step": 6349, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007598857628181577, + "timestamp": "2025-10-01 03:28:53.297951", + "step": 6350, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:53.340300", + "step": 6350, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003302047844044864, + "timestamp": "2025-10-01 03:28:53.351113", + "step": 6351, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:53.403056", + "step": 6351, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022508377151098102, + "timestamp": "2025-10-01 03:28:53.428313", + "step": 6352, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:53.479009", + "step": 6352, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001243461505509913, + "timestamp": "2025-10-01 03:28:53.492548", + "step": 6353, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:53.536122", + "step": 6353, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007870771922171116, + "timestamp": "2025-10-01 03:28:53.548934", + "step": 6354, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:53.593450", + "step": 6354, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047562873805873096, + "timestamp": "2025-10-01 03:28:53.610039", + "step": 6355, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:53.666990", + "step": 6355, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002293073630426079, + "timestamp": "2025-10-01 03:28:53.699455", + "step": 6356, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:53.753444", + "step": 6356, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006351800402626395, + "timestamp": "2025-10-01 03:28:53.762635", + "step": 6357, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:53.809735", + "step": 6357, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022537035692948848, + "timestamp": "2025-10-01 03:28:53.820598", + "step": 6358, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:53.887674", + "step": 6358, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003723886620718986, + "timestamp": "2025-10-01 03:28:53.896765", + "step": 6359, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:53.961526", + "step": 6359, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011128704063594341, + "timestamp": "2025-10-01 03:28:53.994484", + "step": 6360, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.051747", + "step": 6360, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.869025234365836e-05, + "timestamp": "2025-10-01 03:28:54.063748", + "step": 6361, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.111737", + "step": 6361, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03244294971227646, + "timestamp": "2025-10-01 03:28:54.124841", + "step": 6362, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.174985", + "step": 6362, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013976174406707287, + "timestamp": "2025-10-01 03:28:54.185844", + "step": 6363, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:54.233286", + "step": 6363, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013867336325347424, + "timestamp": "2025-10-01 03:28:54.272144", + "step": 6364, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.315065", + "step": 6364, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004368195659480989, + "timestamp": "2025-10-01 03:28:54.325297", + "step": 6365, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.373388", + "step": 6365, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008207297883927822, + "timestamp": "2025-10-01 03:28:54.382786", + "step": 6366, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.424943", + "step": 6366, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016069255070760846, + "timestamp": "2025-10-01 03:28:54.433752", + "step": 6367, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.473825", + "step": 6367, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000919119396712631, + "timestamp": "2025-10-01 03:28:54.507095", + "step": 6368, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.564024", + "step": 6368, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019050850823987275, + "timestamp": "2025-10-01 03:28:54.572428", + "step": 6369, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:54.614729", + "step": 6369, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015561490727122873, + "timestamp": "2025-10-01 03:28:54.625331", + "step": 6370, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.665587", + "step": 6370, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002970076457131654, + "timestamp": "2025-10-01 03:28:54.676823", + "step": 6371, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.719707", + "step": 6371, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030752053135074675, + "timestamp": "2025-10-01 03:28:54.750791", + "step": 6372, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.790932", + "step": 6372, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002079000696539879, + "timestamp": "2025-10-01 03:28:54.799445", + "step": 6373, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.850297", + "step": 6373, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005033536581322551, + "timestamp": "2025-10-01 03:28:54.853319", + "step": 6374, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.894776", + "step": 6374, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005735758226364851, + "timestamp": "2025-10-01 03:28:54.909902", + "step": 6375, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:54.971363", + "step": 6375, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010302412556484342, + "timestamp": "2025-10-01 03:28:54.997188", + "step": 6376, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:55.043020", + "step": 6376, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011756221065297723, + "timestamp": "2025-10-01 03:28:55.061341", + "step": 6377, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:55.095330", + "step": 6377, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009831880452111363, + "timestamp": "2025-10-01 03:28:55.109911", + "step": 6378, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:55.155889", + "step": 6378, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003762459964491427, + "timestamp": "2025-10-01 03:28:55.170668", + "step": 6379, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:55.218247", + "step": 6379, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023045160342007875, + "timestamp": "2025-10-01 03:28:55.255120", + "step": 6380, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:55.307724", + "step": 6380, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008488004095852375, + "timestamp": "2025-10-01 03:28:55.328541", + "step": 6381, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:55.373210", + "step": 6381, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007675032829865813, + "timestamp": "2025-10-01 03:28:55.386529", + "step": 6382, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:55.431568", + "step": 6382, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008526192978024483, + "timestamp": "2025-10-01 03:28:55.444273", + "step": 6383, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:55.489383", + "step": 6383, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003477979335002601, + "timestamp": "2025-10-01 03:28:55.527366", + "step": 6384, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:28:58.351129", + "step": 6384, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2299927.8364094137, + "timestamp": "2025-10-01 03:28:58.357646", + "step": 6384, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:58.395854", + "step": 6384, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005578836426138878, + "timestamp": "2025-10-01 03:28:58.409482", + "step": 6385, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:58.455461", + "step": 6385, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019137021154165268, + "timestamp": "2025-10-01 03:28:58.459239", + "step": 6386, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:58.497522", + "step": 6386, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002000471286009997, + "timestamp": "2025-10-01 03:28:58.508187", + "step": 6387, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:58.555576", + "step": 6387, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002101100835716352, + "timestamp": "2025-10-01 03:28:58.581440", + "step": 6388, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:58.623550", + "step": 6388, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006774211069568992, + "timestamp": "2025-10-01 03:28:58.642805", + "step": 6389, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:58.676601", + "step": 6389, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024320294323842973, + "timestamp": "2025-10-01 03:28:58.695582", + "step": 6390, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:58.732834", + "step": 6390, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01724609173834324, + "timestamp": "2025-10-01 03:28:58.735918", + "step": 6391, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:58.783352", + "step": 6391, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006392185459844768, + "timestamp": "2025-10-01 03:28:58.812153", + "step": 6392, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:58.847616", + "step": 6392, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006124023348093033, + "timestamp": "2025-10-01 03:28:58.854374", + "step": 6393, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:58.891397", + "step": 6393, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040496813016943634, + "timestamp": "2025-10-01 03:28:58.896355", + "step": 6394, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:58.939933", + "step": 6394, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010214002104476094, + "timestamp": "2025-10-01 03:28:58.949282", + "step": 6395, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:58.985533", + "step": 6395, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020655505068134516, + "timestamp": "2025-10-01 03:28:59.016580", + "step": 6396, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:59.055910", + "step": 6396, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029759358149021864, + "timestamp": "2025-10-01 03:28:59.063424", + "step": 6397, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:59.108346", + "step": 6397, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007765110931359231, + "timestamp": "2025-10-01 03:28:59.121366", + "step": 6398, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:59.170877", + "step": 6398, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009212939999997616, + "timestamp": "2025-10-01 03:28:59.178619", + "step": 6399, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:59.225329", + "step": 6399, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01982162706553936, + "timestamp": "2025-10-01 03:28:59.259683", + "step": 6400, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:59.311114", + "step": 6400, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047144535346888006, + "timestamp": "2025-10-01 03:28:59.321841", + "step": 6401, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:59.362195", + "step": 6401, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004240524955093861, + "timestamp": "2025-10-01 03:28:59.369174", + "step": 6402, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:59.402369", + "step": 6402, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01039605587720871, + "timestamp": "2025-10-01 03:28:59.409843", + "step": 6403, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:28:59.451852", + "step": 6403, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019638585217762738, + "timestamp": "2025-10-01 03:28:59.482257", + "step": 6404, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:59.523726", + "step": 6404, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00512290745973587, + "timestamp": "2025-10-01 03:28:59.531411", + "step": 6405, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:59.571907", + "step": 6405, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015111123211681843, + "timestamp": "2025-10-01 03:28:59.579214", + "step": 6406, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:59.619733", + "step": 6406, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035311750252731144, + "timestamp": "2025-10-01 03:28:59.623224", + "step": 6407, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:59.663957", + "step": 6407, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000806894211564213, + "timestamp": "2025-10-01 03:28:59.692760", + "step": 6408, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:59.746447", + "step": 6408, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003590658598113805, + "timestamp": "2025-10-01 03:28:59.759840", + "step": 6409, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:59.793676", + "step": 6409, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024633979774080217, + "timestamp": "2025-10-01 03:28:59.807579", + "step": 6410, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:28:59.857159", + "step": 6410, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003144457587040961, + "timestamp": "2025-10-01 03:28:59.873198", + "step": 6411, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:28:59.918833", + "step": 6411, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001585730497026816, + "timestamp": "2025-10-01 03:28:59.951394", + "step": 6412, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:28:59.986114", + "step": 6412, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004180940450169146, + "timestamp": "2025-10-01 03:28:59.990201", + "step": 6413, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.022563", + "step": 6413, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009167595766484737, + "timestamp": "2025-10-01 03:29:00.033063", + "step": 6414, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.066598", + "step": 6414, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001406870869686827, + "timestamp": "2025-10-01 03:29:00.076851", + "step": 6415, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.119909", + "step": 6415, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05065840110182762, + "timestamp": "2025-10-01 03:29:00.152284", + "step": 6416, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.194706", + "step": 6416, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035984799615107477, + "timestamp": "2025-10-01 03:29:00.202862", + "step": 6417, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.249844", + "step": 6417, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034828195930458605, + "timestamp": "2025-10-01 03:29:00.256134", + "step": 6418, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.291845", + "step": 6418, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026362081989645958, + "timestamp": "2025-10-01 03:29:00.296750", + "step": 6419, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.332523", + "step": 6419, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003841598518192768, + "timestamp": "2025-10-01 03:29:00.358956", + "step": 6420, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.395258", + "step": 6420, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017237431893590838, + "timestamp": "2025-10-01 03:29:00.402213", + "step": 6421, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:29:00.440576", + "step": 6421, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002615362638607621, + "timestamp": "2025-10-01 03:29:00.444154", + "step": 6422, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.480033", + "step": 6422, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016311315121129155, + "timestamp": "2025-10-01 03:29:00.491754", + "step": 6423, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.534257", + "step": 6423, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04008765146136284, + "timestamp": "2025-10-01 03:29:00.558803", + "step": 6424, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.600612", + "step": 6424, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008744248189032078, + "timestamp": "2025-10-01 03:29:00.607511", + "step": 6425, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:00.650042", + "step": 6425, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030510316719301045, + "timestamp": "2025-10-01 03:29:00.659636", + "step": 6426, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.699823", + "step": 6426, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006285672425292432, + "timestamp": "2025-10-01 03:29:00.706451", + "step": 6427, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.742570", + "step": 6427, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004495643370319158, + "timestamp": "2025-10-01 03:29:00.767523", + "step": 6428, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:00.803455", + "step": 6428, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020525876607280225, + "timestamp": "2025-10-01 03:29:00.810872", + "step": 6429, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.853840", + "step": 6429, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002672094153240323, + "timestamp": "2025-10-01 03:29:00.856701", + "step": 6430, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.893002", + "step": 6430, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.939695883076638e-05, + "timestamp": "2025-10-01 03:29:00.898233", + "step": 6431, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:00.932902", + "step": 6431, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002275678562000394, + "timestamp": "2025-10-01 03:29:00.963174", + "step": 6432, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:01.009506", + "step": 6432, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025284907314926386, + "timestamp": "2025-10-01 03:29:01.022927", + "step": 6433, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:01.067041", + "step": 6433, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022884199861437082, + "timestamp": "2025-10-01 03:29:01.081077", + "step": 6434, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:01.137816", + "step": 6434, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003783514257520437, + "timestamp": "2025-10-01 03:29:01.141447", + "step": 6435, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-10-01 03:29:01.192818", + "step": 6435, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005309915286488831, + "timestamp": "2025-10-01 03:29:01.229224", + "step": 6436, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:01.272264", + "step": 6436, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.030620167031884193, + "timestamp": "2025-10-01 03:29:01.284290", + "step": 6437, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:01.328414", + "step": 6437, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015387388702947646, + "timestamp": "2025-10-01 03:29:01.344320", + "step": 6438, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:01.389500", + "step": 6438, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011818463914096355, + "timestamp": "2025-10-01 03:29:01.403861", + "step": 6439, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:01.447865", + "step": 6439, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032132724300026894, + "timestamp": "2025-10-01 03:29:01.483375", + "step": 6440, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:01.525952", + "step": 6440, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034858647268265486, + "timestamp": "2025-10-01 03:29:01.539445", + "step": 6441, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:01.583811", + "step": 6441, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002906879235524684, + "timestamp": "2025-10-01 03:29:01.598455", + "step": 6442, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:01.646542", + "step": 6442, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010997000150382519, + "timestamp": "2025-10-01 03:29:01.649785", + "step": 6443, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:01.697036", + "step": 6443, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01967480406165123, + "timestamp": "2025-10-01 03:29:01.722936", + "step": 6444, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:01.758029", + "step": 6444, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.032315973192453384, + "timestamp": "2025-10-01 03:29:01.762995", + "step": 6445, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:01.810258", + "step": 6445, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011465762509033084, + "timestamp": "2025-10-01 03:29:01.817310", + "step": 6446, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:01.854348", + "step": 6446, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01575501635670662, + "timestamp": "2025-10-01 03:29:01.868122", + "step": 6447, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:01.904009", + "step": 6447, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006529785459861159, + "timestamp": "2025-10-01 03:29:01.928052", + "step": 6448, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:01.980062", + "step": 6448, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003195781901013106, + "timestamp": "2025-10-01 03:29:01.997227", + "step": 6449, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:02.031846", + "step": 6449, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04281727224588394, + "timestamp": "2025-10-01 03:29:02.035974", + "step": 6450, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:02.083418", + "step": 6450, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008883344125933945, + "timestamp": "2025-10-01 03:29:02.089279", + "step": 6451, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:02.127241", + "step": 6451, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001140778185799718, + "timestamp": "2025-10-01 03:29:02.151791", + "step": 6452, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:02.190111", + "step": 6452, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000458420196082443, + "timestamp": "2025-10-01 03:29:02.194771", + "step": 6453, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:02.229195", + "step": 6453, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021822065114974976, + "timestamp": "2025-10-01 03:29:02.245189", + "step": 6454, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:02.288662", + "step": 6454, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011205396614968777, + "timestamp": "2025-10-01 03:29:02.307072", + "step": 6455, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:02.347156", + "step": 6455, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001184874097816646, + "timestamp": "2025-10-01 03:29:02.371990", + "step": 6456, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:02.420297", + "step": 6456, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003944109776057303, + "timestamp": "2025-10-01 03:29:02.431592", + "step": 6457, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:02.466752", + "step": 6457, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018186431378126144, + "timestamp": "2025-10-01 03:29:02.473272", + "step": 6458, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:02.505129", + "step": 6458, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0037767086178064346, + "timestamp": "2025-10-01 03:29:02.510288", + "step": 6459, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:02.553615", + "step": 6459, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03515612334012985, + "timestamp": "2025-10-01 03:29:02.580294", + "step": 6460, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:02.655357", + "step": 6460, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0559275820851326, + "timestamp": "2025-10-01 03:29:02.660437", + "step": 6461, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:02.696227", + "step": 6461, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03151208534836769, + "timestamp": "2025-10-01 03:29:02.706581", + "step": 6462, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:02.751776", + "step": 6462, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01227272767573595, + "timestamp": "2025-10-01 03:29:02.757353", + "step": 6463, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:02.801867", + "step": 6463, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030822870321571827, + "timestamp": "2025-10-01 03:29:02.828311", + "step": 6464, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:02.872181", + "step": 6464, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024301744997501373, + "timestamp": "2025-10-01 03:29:02.875375", + "step": 6465, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:02.917939", + "step": 6465, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006559452740475535, + "timestamp": "2025-10-01 03:29:02.929202", + "step": 6466, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:02.979495", + "step": 6466, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014471141621470451, + "timestamp": "2025-10-01 03:29:02.991340", + "step": 6467, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.033292", + "step": 6467, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002082330611301586, + "timestamp": "2025-10-01 03:29:03.058996", + "step": 6468, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.096293", + "step": 6468, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005808802787214518, + "timestamp": "2025-10-01 03:29:03.105151", + "step": 6469, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.142551", + "step": 6469, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007345914491452277, + "timestamp": "2025-10-01 03:29:03.148902", + "step": 6470, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:03.184843", + "step": 6470, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040983097278513014, + "timestamp": "2025-10-01 03:29:03.190499", + "step": 6471, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.226533", + "step": 6471, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006733497721143067, + "timestamp": "2025-10-01 03:29:03.256582", + "step": 6472, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:03.292701", + "step": 6472, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009030275978147984, + "timestamp": "2025-10-01 03:29:03.298552", + "step": 6473, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.335510", + "step": 6473, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004183008335530758, + "timestamp": "2025-10-01 03:29:03.345653", + "step": 6474, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.387340", + "step": 6474, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000693899521138519, + "timestamp": "2025-10-01 03:29:03.393836", + "step": 6475, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.428892", + "step": 6475, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002869273303076625, + "timestamp": "2025-10-01 03:29:03.456045", + "step": 6476, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.498515", + "step": 6476, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008604147587902844, + "timestamp": "2025-10-01 03:29:03.503667", + "step": 6477, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.541115", + "step": 6477, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00046403956366702914, + "timestamp": "2025-10-01 03:29:03.544525", + "step": 6478, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.583883", + "step": 6478, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002189114922657609, + "timestamp": "2025-10-01 03:29:03.587356", + "step": 6479, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.624780", + "step": 6479, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032494161860086024, + "timestamp": "2025-10-01 03:29:03.649148", + "step": 6480, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.691875", + "step": 6480, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011473357444629073, + "timestamp": "2025-10-01 03:29:03.707080", + "step": 6481, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:03.750399", + "step": 6481, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004927809932269156, + "timestamp": "2025-10-01 03:29:03.755063", + "step": 6482, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.803215", + "step": 6482, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011216108687222004, + "timestamp": "2025-10-01 03:29:03.818911", + "step": 6483, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:03.863320", + "step": 6483, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001846486353315413, + "timestamp": "2025-10-01 03:29:03.892364", + "step": 6484, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.928418", + "step": 6484, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047048807027749717, + "timestamp": "2025-10-01 03:29:03.934738", + "step": 6485, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:03.969587", + "step": 6485, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006769784959033132, + "timestamp": "2025-10-01 03:29:03.973977", + "step": 6486, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:04.006982", + "step": 6486, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004444987862370908, + "timestamp": "2025-10-01 03:29:04.012539", + "step": 6487, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:04.051288", + "step": 6487, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002821916015818715, + "timestamp": "2025-10-01 03:29:04.077329", + "step": 6488, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:04.119458", + "step": 6488, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0038089759182184935, + "timestamp": "2025-10-01 03:29:04.122643", + "step": 6489, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:04.154608", + "step": 6489, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005557037657126784, + "timestamp": "2025-10-01 03:29:04.166564", + "step": 6490, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:04.209913", + "step": 6490, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00046221213415265083, + "timestamp": "2025-10-01 03:29:04.216002", + "step": 6491, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:04.264869", + "step": 6491, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008970754221081734, + "timestamp": "2025-10-01 03:29:04.293994", + "step": 6492, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:04.331237", + "step": 6492, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006356093799695373, + "timestamp": "2025-10-01 03:29:04.335877", + "step": 6493, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:04.369112", + "step": 6493, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021053898672107607, + "timestamp": "2025-10-01 03:29:04.372887", + "step": 6494, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:04.407477", + "step": 6494, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032960035605356097, + "timestamp": "2025-10-01 03:29:04.410567", + "step": 6495, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:04.444193", + "step": 6495, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.024960482493042946, + "timestamp": "2025-10-01 03:29:04.469126", + "step": 6496, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:04.509089", + "step": 6496, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000565956870559603, + "timestamp": "2025-10-01 03:29:04.516316", + "step": 6497, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:04.558016", + "step": 6497, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003469069197308272, + "timestamp": "2025-10-01 03:29:04.564003", + "step": 6498, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:04.600803", + "step": 6498, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014437665231525898, + "timestamp": "2025-10-01 03:29:04.605326", + "step": 6499, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:04.639424", + "step": 6499, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03047267720103264, + "timestamp": "2025-10-01 03:29:04.665985", + "step": 6500, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 6500", + "timestamp": "2025-10-01 03:29:11.349465", + "step": 6500, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:11.382305", + "step": 6500, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024742368259467185, + "timestamp": "2025-10-01 03:29:11.385407", + "step": 6501, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:11.419601", + "step": 6501, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009701532311737537, + "timestamp": "2025-10-01 03:29:11.423968", + "step": 6502, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:11.456815", + "step": 6502, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002863029367290437, + "timestamp": "2025-10-01 03:29:11.461578", + "step": 6503, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:11.495364", + "step": 6503, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.029747838154435158, + "timestamp": "2025-10-01 03:29:11.522852", + "step": 6504, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:11.561398", + "step": 6504, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015543913468718529, + "timestamp": "2025-10-01 03:29:11.565040", + "step": 6505, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:11.598269", + "step": 6505, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004422831698320806, + "timestamp": "2025-10-01 03:29:11.602341", + "step": 6506, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:11.640330", + "step": 6506, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036545441253110766, + "timestamp": "2025-10-01 03:29:11.646778", + "step": 6507, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:11.686271", + "step": 6507, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002604164183139801, + "timestamp": "2025-10-01 03:29:11.712719", + "step": 6508, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:11.755274", + "step": 6508, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005315797752700746, + "timestamp": "2025-10-01 03:29:11.763245", + "step": 6509, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:11.798279", + "step": 6509, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016225669533014297, + "timestamp": "2025-10-01 03:29:11.801856", + "step": 6510, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:11.835773", + "step": 6510, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044330343371257186, + "timestamp": "2025-10-01 03:29:11.839351", + "step": 6511, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:11.874949", + "step": 6511, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003110405523329973, + "timestamp": "2025-10-01 03:29:11.899800", + "step": 6512, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:11.931655", + "step": 6512, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004143216065131128, + "timestamp": "2025-10-01 03:29:11.936215", + "step": 6513, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:11.969425", + "step": 6513, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009028271888382733, + "timestamp": "2025-10-01 03:29:11.973979", + "step": 6514, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.007823", + "step": 6514, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002043894724920392, + "timestamp": "2025-10-01 03:29:12.030852", + "step": 6515, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.070405", + "step": 6515, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010262444848194718, + "timestamp": "2025-10-01 03:29:12.094894", + "step": 6516, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.127886", + "step": 6516, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000561762077268213, + "timestamp": "2025-10-01 03:29:12.130138", + "step": 6517, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:12.162822", + "step": 6517, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012786233564838767, + "timestamp": "2025-10-01 03:29:12.165063", + "step": 6518, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.196990", + "step": 6518, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023576767125632614, + "timestamp": "2025-10-01 03:29:12.199758", + "step": 6519, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.231494", + "step": 6519, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006383405067026615, + "timestamp": "2025-10-01 03:29:12.255374", + "step": 6520, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.292195", + "step": 6520, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025260287802666426, + "timestamp": "2025-10-01 03:29:12.294556", + "step": 6521, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.326361", + "step": 6521, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015915753319859505, + "timestamp": "2025-10-01 03:29:12.329043", + "step": 6522, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.369507", + "step": 6522, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01868819259107113, + "timestamp": "2025-10-01 03:29:12.372078", + "step": 6523, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.405009", + "step": 6523, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012781519908457994, + "timestamp": "2025-10-01 03:29:12.428869", + "step": 6524, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.462308", + "step": 6524, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006444285390898585, + "timestamp": "2025-10-01 03:29:12.464809", + "step": 6525, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:12.497193", + "step": 6525, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019412051187828183, + "timestamp": "2025-10-01 03:29:12.500393", + "step": 6526, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.534451", + "step": 6526, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005429714801721275, + "timestamp": "2025-10-01 03:29:12.536946", + "step": 6527, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.568375", + "step": 6527, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008869375102221966, + "timestamp": "2025-10-01 03:29:12.593256", + "step": 6528, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.624669", + "step": 6528, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006169078988023102, + "timestamp": "2025-10-01 03:29:12.627770", + "step": 6529, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.664888", + "step": 6529, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.550673712510616e-05, + "timestamp": "2025-10-01 03:29:12.669104", + "step": 6530, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:12.702504", + "step": 6530, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002046569250524044, + "timestamp": "2025-10-01 03:29:12.706290", + "step": 6531, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.743117", + "step": 6531, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009919889271259308, + "timestamp": "2025-10-01 03:29:12.766968", + "step": 6532, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:12.799440", + "step": 6532, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001567811006680131, + "timestamp": "2025-10-01 03:29:12.801803", + "step": 6533, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.833293", + "step": 6533, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027550855884328485, + "timestamp": "2025-10-01 03:29:12.836140", + "step": 6534, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.869416", + "step": 6534, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00039210665272548795, + "timestamp": "2025-10-01 03:29:12.871783", + "step": 6535, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:12.907484", + "step": 6535, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021609025134239346, + "timestamp": "2025-10-01 03:29:12.931230", + "step": 6536, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:29:15.628244", + "step": 6536, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2038007.6788672204, + "timestamp": "2025-10-01 03:29:15.630678", + "step": 6536, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:15.660914", + "step": 6536, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001521719095762819, + "timestamp": "2025-10-01 03:29:15.664236", + "step": 6537, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:15.700382", + "step": 6537, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035437254700809717, + "timestamp": "2025-10-01 03:29:15.705281", + "step": 6538, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:15.740777", + "step": 6538, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018870830535888672, + "timestamp": "2025-10-01 03:29:15.743606", + "step": 6539, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:15.780583", + "step": 6539, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009507990325801075, + "timestamp": "2025-10-01 03:29:15.805035", + "step": 6540, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:15.840881", + "step": 6540, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015276185877155513, + "timestamp": "2025-10-01 03:29:15.843499", + "step": 6541, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:15.876669", + "step": 6541, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018359601497650146, + "timestamp": "2025-10-01 03:29:15.879254", + "step": 6542, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:15.917882", + "step": 6542, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000332735653501004, + "timestamp": "2025-10-01 03:29:15.920286", + "step": 6543, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:15.955327", + "step": 6543, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012719864025712013, + "timestamp": "2025-10-01 03:29:15.979524", + "step": 6544, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:16.013099", + "step": 6544, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017372838919982314, + "timestamp": "2025-10-01 03:29:16.015862", + "step": 6545, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.058073", + "step": 6545, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021688290871679783, + "timestamp": "2025-10-01 03:29:16.060778", + "step": 6546, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:16.093720", + "step": 6546, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038758074515499175, + "timestamp": "2025-10-01 03:29:16.096257", + "step": 6547, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.129782", + "step": 6547, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005619368166662753, + "timestamp": "2025-10-01 03:29:16.154017", + "step": 6548, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.186586", + "step": 6548, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034873693948611617, + "timestamp": "2025-10-01 03:29:16.189437", + "step": 6549, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.222332", + "step": 6549, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006478457944467664, + "timestamp": "2025-10-01 03:29:16.225019", + "step": 6550, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.258424", + "step": 6550, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047429490950889885, + "timestamp": "2025-10-01 03:29:16.261008", + "step": 6551, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:16.296120", + "step": 6551, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025862993206828833, + "timestamp": "2025-10-01 03:29:16.320737", + "step": 6552, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.354174", + "step": 6552, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03014281764626503, + "timestamp": "2025-10-01 03:29:16.358966", + "step": 6553, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:16.394014", + "step": 6553, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003434852696955204, + "timestamp": "2025-10-01 03:29:16.396287", + "step": 6554, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.430435", + "step": 6554, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006137368269264698, + "timestamp": "2025-10-01 03:29:16.432740", + "step": 6555, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.466813", + "step": 6555, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016447466623503715, + "timestamp": "2025-10-01 03:29:16.491385", + "step": 6556, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.526681", + "step": 6556, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025361691950820386, + "timestamp": "2025-10-01 03:29:16.529205", + "step": 6557, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.562990", + "step": 6557, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029834735323674977, + "timestamp": "2025-10-01 03:29:16.565474", + "step": 6558, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.599246", + "step": 6558, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000770837243180722, + "timestamp": "2025-10-01 03:29:16.602176", + "step": 6559, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.637120", + "step": 6559, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.028089862316846848, + "timestamp": "2025-10-01 03:29:16.661328", + "step": 6560, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:16.698782", + "step": 6560, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002813624159898609, + "timestamp": "2025-10-01 03:29:16.702823", + "step": 6561, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:16.740146", + "step": 6561, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004452059220056981, + "timestamp": "2025-10-01 03:29:16.742762", + "step": 6562, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.775412", + "step": 6562, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015438857953995466, + "timestamp": "2025-10-01 03:29:16.778360", + "step": 6563, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.814679", + "step": 6563, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.045196808874607086, + "timestamp": "2025-10-01 03:29:16.846621", + "step": 6564, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:16.885883", + "step": 6564, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009378218674100935, + "timestamp": "2025-10-01 03:29:16.893861", + "step": 6565, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.929342", + "step": 6565, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002439219271764159, + "timestamp": "2025-10-01 03:29:16.935903", + "step": 6566, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:16.972925", + "step": 6566, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012037107953801751, + "timestamp": "2025-10-01 03:29:16.977027", + "step": 6567, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:17.009333", + "step": 6567, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015085596533026546, + "timestamp": "2025-10-01 03:29:17.044328", + "step": 6568, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:17.078837", + "step": 6568, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004460792988538742, + "timestamp": "2025-10-01 03:29:17.082582", + "step": 6569, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.116496", + "step": 6569, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019069749861955643, + "timestamp": "2025-10-01 03:29:17.119276", + "step": 6570, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.151074", + "step": 6570, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020023228717036545, + "timestamp": "2025-10-01 03:29:17.153745", + "step": 6571, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.191600", + "step": 6571, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.025940483435988426, + "timestamp": "2025-10-01 03:29:17.216847", + "step": 6572, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.253538", + "step": 6572, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041806368972174823, + "timestamp": "2025-10-01 03:29:17.256192", + "step": 6573, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.290133", + "step": 6573, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001563518599141389, + "timestamp": "2025-10-01 03:29:17.294209", + "step": 6574, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.330919", + "step": 6574, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00611058110371232, + "timestamp": "2025-10-01 03:29:17.334060", + "step": 6575, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.370547", + "step": 6575, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022560663637705147, + "timestamp": "2025-10-01 03:29:17.395126", + "step": 6576, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.427810", + "step": 6576, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005957442335784435, + "timestamp": "2025-10-01 03:29:17.431248", + "step": 6577, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.463718", + "step": 6577, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033105025067925453, + "timestamp": "2025-10-01 03:29:17.466703", + "step": 6578, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:17.501877", + "step": 6578, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003717223589774221, + "timestamp": "2025-10-01 03:29:17.505006", + "step": 6579, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.536855", + "step": 6579, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.070593710523099e-05, + "timestamp": "2025-10-01 03:29:17.561403", + "step": 6580, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.596585", + "step": 6580, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016959008644334972, + "timestamp": "2025-10-01 03:29:17.599414", + "step": 6581, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:29:17.631497", + "step": 6581, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007205902365967631, + "timestamp": "2025-10-01 03:29:17.634968", + "step": 6582, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:17.670110", + "step": 6582, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03192512318491936, + "timestamp": "2025-10-01 03:29:17.672773", + "step": 6583, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:17.711532", + "step": 6583, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030625067302025855, + "timestamp": "2025-10-01 03:29:17.735461", + "step": 6584, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:17.769048", + "step": 6584, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008404629188589752, + "timestamp": "2025-10-01 03:29:17.771659", + "step": 6585, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.814520", + "step": 6585, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023588910698890686, + "timestamp": "2025-10-01 03:29:17.817000", + "step": 6586, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.851415", + "step": 6586, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001216551405377686, + "timestamp": "2025-10-01 03:29:17.854907", + "step": 6587, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.891556", + "step": 6587, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030616085859946907, + "timestamp": "2025-10-01 03:29:17.915460", + "step": 6588, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:17.949666", + "step": 6588, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004335560370236635, + "timestamp": "2025-10-01 03:29:17.952176", + "step": 6589, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:17.983242", + "step": 6589, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0049544163048267365, + "timestamp": "2025-10-01 03:29:17.985565", + "step": 6590, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.017953", + "step": 6590, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002597290789708495, + "timestamp": "2025-10-01 03:29:18.020889", + "step": 6591, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.053850", + "step": 6591, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05708184093236923, + "timestamp": "2025-10-01 03:29:18.078188", + "step": 6592, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.120321", + "step": 6592, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003919387818314135, + "timestamp": "2025-10-01 03:29:18.122753", + "step": 6593, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:18.154616", + "step": 6593, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016680363565683365, + "timestamp": "2025-10-01 03:29:18.156875", + "step": 6594, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.187817", + "step": 6594, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013221116969361901, + "timestamp": "2025-10-01 03:29:18.190313", + "step": 6595, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.222854", + "step": 6595, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011008403962478042, + "timestamp": "2025-10-01 03:29:18.246837", + "step": 6596, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.288156", + "step": 6596, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013802693865727633, + "timestamp": "2025-10-01 03:29:18.290652", + "step": 6597, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:18.325103", + "step": 6597, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012058001942932606, + "timestamp": "2025-10-01 03:29:18.327344", + "step": 6598, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:18.364689", + "step": 6598, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017111106717493385, + "timestamp": "2025-10-01 03:29:18.366988", + "step": 6599, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.398180", + "step": 6599, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009995910804718733, + "timestamp": "2025-10-01 03:29:18.422164", + "step": 6600, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.455388", + "step": 6600, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013759228168055415, + "timestamp": "2025-10-01 03:29:18.458677", + "step": 6601, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.490220", + "step": 6601, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019299559062346816, + "timestamp": "2025-10-01 03:29:18.493499", + "step": 6602, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:29:18.525403", + "step": 6602, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028218126390129328, + "timestamp": "2025-10-01 03:29:18.528501", + "step": 6603, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.563609", + "step": 6603, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004139699158258736, + "timestamp": "2025-10-01 03:29:18.587945", + "step": 6604, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:29:18.623810", + "step": 6604, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004132052999921143, + "timestamp": "2025-10-01 03:29:18.626451", + "step": 6605, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:18.658226", + "step": 6605, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018809930188581347, + "timestamp": "2025-10-01 03:29:18.661216", + "step": 6606, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.694518", + "step": 6606, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002890937030315399, + "timestamp": "2025-10-01 03:29:18.697353", + "step": 6607, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.729645", + "step": 6607, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004142954130657017, + "timestamp": "2025-10-01 03:29:18.754109", + "step": 6608, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:18.790216", + "step": 6608, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001188612193800509, + "timestamp": "2025-10-01 03:29:18.793011", + "step": 6609, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.825040", + "step": 6609, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01108113769441843, + "timestamp": "2025-10-01 03:29:18.828053", + "step": 6610, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.864655", + "step": 6610, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013561986852437258, + "timestamp": "2025-10-01 03:29:18.867211", + "step": 6611, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.900417", + "step": 6611, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001338635920546949, + "timestamp": "2025-10-01 03:29:18.925073", + "step": 6612, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:18.960321", + "step": 6612, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007643382996320724, + "timestamp": "2025-10-01 03:29:18.963737", + "step": 6613, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:18.997133", + "step": 6613, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00046435854164883494, + "timestamp": "2025-10-01 03:29:18.999847", + "step": 6614, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.033028", + "step": 6614, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000443335622549057, + "timestamp": "2025-10-01 03:29:19.035945", + "step": 6615, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.068116", + "step": 6615, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032084115082398057, + "timestamp": "2025-10-01 03:29:19.092692", + "step": 6616, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.127235", + "step": 6616, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031808551284484565, + "timestamp": "2025-10-01 03:29:19.130106", + "step": 6617, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.170294", + "step": 6617, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.370633440790698e-05, + "timestamp": "2025-10-01 03:29:19.173644", + "step": 6618, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.213340", + "step": 6618, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017803135560825467, + "timestamp": "2025-10-01 03:29:19.216752", + "step": 6619, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.251368", + "step": 6619, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.023012088611721992, + "timestamp": "2025-10-01 03:29:19.276125", + "step": 6620, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.311219", + "step": 6620, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028484532958827913, + "timestamp": "2025-10-01 03:29:19.314041", + "step": 6621, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.349664", + "step": 6621, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.020919933915138245, + "timestamp": "2025-10-01 03:29:19.352938", + "step": 6622, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.386695", + "step": 6622, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.020217211917042732, + "timestamp": "2025-10-01 03:29:19.392030", + "step": 6623, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:19.429562", + "step": 6623, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024033788940869272, + "timestamp": "2025-10-01 03:29:19.455104", + "step": 6624, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.495597", + "step": 6624, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023188711202237755, + "timestamp": "2025-10-01 03:29:19.500412", + "step": 6625, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.533076", + "step": 6625, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001963232411071658, + "timestamp": "2025-10-01 03:29:19.537062", + "step": 6626, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.576695", + "step": 6626, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005919639370404184, + "timestamp": "2025-10-01 03:29:19.581563", + "step": 6627, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.615520", + "step": 6627, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031850344967097044, + "timestamp": "2025-10-01 03:29:19.640642", + "step": 6628, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.677152", + "step": 6628, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001644484931603074, + "timestamp": "2025-10-01 03:29:19.681905", + "step": 6629, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.721959", + "step": 6629, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007898849435150623, + "timestamp": "2025-10-01 03:29:19.727293", + "step": 6630, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:19.761779", + "step": 6630, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019257741223555058, + "timestamp": "2025-10-01 03:29:19.764787", + "step": 6631, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.797550", + "step": 6631, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003969310317188501, + "timestamp": "2025-10-01 03:29:19.822589", + "step": 6632, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.857728", + "step": 6632, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004153220448642969, + "timestamp": "2025-10-01 03:29:19.861024", + "step": 6633, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.898508", + "step": 6633, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008387035690248013, + "timestamp": "2025-10-01 03:29:19.902136", + "step": 6634, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.937337", + "step": 6634, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045238033635541797, + "timestamp": "2025-10-01 03:29:19.940649", + "step": 6635, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:19.976113", + "step": 6635, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010848528472706676, + "timestamp": "2025-10-01 03:29:20.003447", + "step": 6636, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.044851", + "step": 6636, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01292957179248333, + "timestamp": "2025-10-01 03:29:20.048251", + "step": 6637, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:20.086684", + "step": 6637, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001000318952719681, + "timestamp": "2025-10-01 03:29:20.090166", + "step": 6638, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.129055", + "step": 6638, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024952192325145006, + "timestamp": "2025-10-01 03:29:20.133995", + "step": 6639, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.166307", + "step": 6639, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019858169835060835, + "timestamp": "2025-10-01 03:29:20.191553", + "step": 6640, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.224404", + "step": 6640, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019111254368908703, + "timestamp": "2025-10-01 03:29:20.228063", + "step": 6641, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.263705", + "step": 6641, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005494021461345255, + "timestamp": "2025-10-01 03:29:20.267747", + "step": 6642, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.304313", + "step": 6642, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0532761812210083, + "timestamp": "2025-10-01 03:29:20.307763", + "step": 6643, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.340205", + "step": 6643, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030943696037866175, + "timestamp": "2025-10-01 03:29:20.365138", + "step": 6644, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.397629", + "step": 6644, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004412596463225782, + "timestamp": "2025-10-01 03:29:20.401764", + "step": 6645, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.435582", + "step": 6645, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043327780440449715, + "timestamp": "2025-10-01 03:29:20.439367", + "step": 6646, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.478878", + "step": 6646, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000169477061717771, + "timestamp": "2025-10-01 03:29:20.482637", + "step": 6647, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:29:20.523598", + "step": 6647, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0070345536805689335, + "timestamp": "2025-10-01 03:29:20.550160", + "step": 6648, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.585566", + "step": 6648, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003049765480682254, + "timestamp": "2025-10-01 03:29:20.588262", + "step": 6649, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.628519", + "step": 6649, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004367529763840139, + "timestamp": "2025-10-01 03:29:20.631668", + "step": 6650, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.668724", + "step": 6650, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022865609207656235, + "timestamp": "2025-10-01 03:29:20.671584", + "step": 6651, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.702579", + "step": 6651, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008741592057049274, + "timestamp": "2025-10-01 03:29:20.726891", + "step": 6652, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.763953", + "step": 6652, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010332781675970182, + "timestamp": "2025-10-01 03:29:20.766369", + "step": 6653, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:20.807675", + "step": 6653, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014580762013792992, + "timestamp": "2025-10-01 03:29:20.810651", + "step": 6654, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.851979", + "step": 6654, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026542218984104693, + "timestamp": "2025-10-01 03:29:20.855079", + "step": 6655, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:20.888371", + "step": 6655, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026724780909717083, + "timestamp": "2025-10-01 03:29:20.912167", + "step": 6656, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:20.944197", + "step": 6656, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006486537167802453, + "timestamp": "2025-10-01 03:29:20.947093", + "step": 6657, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:20.981883", + "step": 6657, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011379897594451904, + "timestamp": "2025-10-01 03:29:20.984391", + "step": 6658, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.022672", + "step": 6658, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016786146443337202, + "timestamp": "2025-10-01 03:29:21.025390", + "step": 6659, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.060984", + "step": 6659, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013009144458919764, + "timestamp": "2025-10-01 03:29:21.084998", + "step": 6660, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.119917", + "step": 6660, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019493862055242062, + "timestamp": "2025-10-01 03:29:21.122224", + "step": 6661, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:21.161764", + "step": 6661, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023835933825466782, + "timestamp": "2025-10-01 03:29:21.166155", + "step": 6662, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.201206", + "step": 6662, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002660910540726036, + "timestamp": "2025-10-01 03:29:21.203621", + "step": 6663, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:21.241387", + "step": 6663, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012930571392644197, + "timestamp": "2025-10-01 03:29:21.265506", + "step": 6664, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.304314", + "step": 6664, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020670857338700444, + "timestamp": "2025-10-01 03:29:21.306672", + "step": 6665, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.343724", + "step": 6665, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005174266989342868, + "timestamp": "2025-10-01 03:29:21.346060", + "step": 6666, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.388480", + "step": 6666, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010044453665614128, + "timestamp": "2025-10-01 03:29:21.390770", + "step": 6667, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.427849", + "step": 6667, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007550310110673308, + "timestamp": "2025-10-01 03:29:21.452027", + "step": 6668, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.486212", + "step": 6668, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004422376397997141, + "timestamp": "2025-10-01 03:29:21.489460", + "step": 6669, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.523592", + "step": 6669, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014320365153253078, + "timestamp": "2025-10-01 03:29:21.526129", + "step": 6670, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:21.563745", + "step": 6670, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.023087528068572e-05, + "timestamp": "2025-10-01 03:29:21.566180", + "step": 6671, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.602825", + "step": 6671, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011144498363137245, + "timestamp": "2025-10-01 03:29:21.628941", + "step": 6672, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.663143", + "step": 6672, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000251805002335459, + "timestamp": "2025-10-01 03:29:21.665481", + "step": 6673, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.698919", + "step": 6673, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001290749933104962, + "timestamp": "2025-10-01 03:29:21.701463", + "step": 6674, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.734707", + "step": 6674, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004177357070147991, + "timestamp": "2025-10-01 03:29:21.737355", + "step": 6675, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:21.777136", + "step": 6675, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010225421283394098, + "timestamp": "2025-10-01 03:29:21.801779", + "step": 6676, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:21.837930", + "step": 6676, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02846197783946991, + "timestamp": "2025-10-01 03:29:21.840312", + "step": 6677, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.872873", + "step": 6677, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023280878667719662, + "timestamp": "2025-10-01 03:29:21.875728", + "step": 6678, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.918064", + "step": 6678, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016950909048318863, + "timestamp": "2025-10-01 03:29:21.920335", + "step": 6679, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:21.954555", + "step": 6679, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03408440947532654, + "timestamp": "2025-10-01 03:29:21.979617", + "step": 6680, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:22.014050", + "step": 6680, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019955032621510327, + "timestamp": "2025-10-01 03:29:22.016744", + "step": 6681, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:22.048896", + "step": 6681, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014131392526905984, + "timestamp": "2025-10-01 03:29:22.051573", + "step": 6682, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:22.083318", + "step": 6682, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010513303568586707, + "timestamp": "2025-10-01 03:29:22.086860", + "step": 6683, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:22.118386", + "step": 6683, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001874300796771422, + "timestamp": "2025-10-01 03:29:22.142771", + "step": 6684, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:22.182474", + "step": 6684, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027098815189674497, + "timestamp": "2025-10-01 03:29:22.186029", + "step": 6685, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:22.219827", + "step": 6685, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017088487220462412, + "timestamp": "2025-10-01 03:29:22.224699", + "step": 6686, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:22.256841", + "step": 6686, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012935529230162501, + "timestamp": "2025-10-01 03:29:22.260523", + "step": 6687, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:22.294508", + "step": 6687, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016191967006307095, + "timestamp": "2025-10-01 03:29:22.319117", + "step": 6688, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:29:24.827728", + "step": 6688, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2558967.463972975, + "timestamp": "2025-10-01 03:29:24.830454", + "step": 6688, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:24.862221", + "step": 6688, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013689398765563965, + "timestamp": "2025-10-01 03:29:24.864805", + "step": 6689, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:24.897355", + "step": 6689, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019632219336926937, + "timestamp": "2025-10-01 03:29:24.900330", + "step": 6690, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:24.932839", + "step": 6690, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012917770072817802, + "timestamp": "2025-10-01 03:29:24.935668", + "step": 6691, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:24.967792", + "step": 6691, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026010232977569103, + "timestamp": "2025-10-01 03:29:24.991891", + "step": 6692, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.027175", + "step": 6692, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035738167352974415, + "timestamp": "2025-10-01 03:29:25.029870", + "step": 6693, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.061024", + "step": 6693, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002411074237897992, + "timestamp": "2025-10-01 03:29:25.063606", + "step": 6694, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.095672", + "step": 6694, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004945158725604415, + "timestamp": "2025-10-01 03:29:25.098641", + "step": 6695, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:25.129636", + "step": 6695, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023583918809890747, + "timestamp": "2025-10-01 03:29:25.153841", + "step": 6696, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:25.185569", + "step": 6696, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010936768958345056, + "timestamp": "2025-10-01 03:29:25.188063", + "step": 6697, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.221495", + "step": 6697, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019152663298882544, + "timestamp": "2025-10-01 03:29:25.224328", + "step": 6698, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.258606", + "step": 6698, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001007340761134401, + "timestamp": "2025-10-01 03:29:25.261334", + "step": 6699, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.297526", + "step": 6699, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006868518539704382, + "timestamp": "2025-10-01 03:29:25.322711", + "step": 6700, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.355001", + "step": 6700, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016309458296746016, + "timestamp": "2025-10-01 03:29:25.358405", + "step": 6701, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.389844", + "step": 6701, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018860531970858574, + "timestamp": "2025-10-01 03:29:25.393215", + "step": 6702, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.428485", + "step": 6702, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042185132042504847, + "timestamp": "2025-10-01 03:29:25.431406", + "step": 6703, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.465598", + "step": 6703, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003078159410506487, + "timestamp": "2025-10-01 03:29:25.489701", + "step": 6704, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.525174", + "step": 6704, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015861500054597855, + "timestamp": "2025-10-01 03:29:25.529743", + "step": 6705, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.562012", + "step": 6705, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004372722469270229, + "timestamp": "2025-10-01 03:29:25.564812", + "step": 6706, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.595989", + "step": 6706, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014589370402973145, + "timestamp": "2025-10-01 03:29:25.598324", + "step": 6707, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.629607", + "step": 6707, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006858698558062315, + "timestamp": "2025-10-01 03:29:25.653831", + "step": 6708, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.690952", + "step": 6708, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.033762019127607346, + "timestamp": "2025-10-01 03:29:25.694217", + "step": 6709, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.726118", + "step": 6709, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.66419029282406e-05, + "timestamp": "2025-10-01 03:29:25.728430", + "step": 6710, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.764932", + "step": 6710, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019328210037201643, + "timestamp": "2025-10-01 03:29:25.767154", + "step": 6711, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:25.804816", + "step": 6711, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.477835945086554e-05, + "timestamp": "2025-10-01 03:29:25.828925", + "step": 6712, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:25.860906", + "step": 6712, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010083908215165138, + "timestamp": "2025-10-01 03:29:25.863290", + "step": 6713, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:25.897879", + "step": 6713, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019503936637192965, + "timestamp": "2025-10-01 03:29:25.900284", + "step": 6714, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:25.933788", + "step": 6714, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018383769202046096, + "timestamp": "2025-10-01 03:29:25.936448", + "step": 6715, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:25.970508", + "step": 6715, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009246842004358768, + "timestamp": "2025-10-01 03:29:25.994718", + "step": 6716, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.029750", + "step": 6716, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017801197827793658, + "timestamp": "2025-10-01 03:29:26.031994", + "step": 6717, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.068398", + "step": 6717, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003531549300532788, + "timestamp": "2025-10-01 03:29:26.070765", + "step": 6718, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.103667", + "step": 6718, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011584791354835033, + "timestamp": "2025-10-01 03:29:26.106154", + "step": 6719, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.142052", + "step": 6719, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.023018626496195793, + "timestamp": "2025-10-01 03:29:26.165960", + "step": 6720, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:26.199340", + "step": 6720, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001112554527935572, + "timestamp": "2025-10-01 03:29:26.205561", + "step": 6721, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:26.239386", + "step": 6721, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002191442617913708, + "timestamp": "2025-10-01 03:29:26.241814", + "step": 6722, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.273726", + "step": 6722, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005135667743161321, + "timestamp": "2025-10-01 03:29:26.276345", + "step": 6723, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.308230", + "step": 6723, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002819916233420372, + "timestamp": "2025-10-01 03:29:26.333581", + "step": 6724, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.364497", + "step": 6724, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014132526121102273, + "timestamp": "2025-10-01 03:29:26.366984", + "step": 6725, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.398839", + "step": 6725, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006785394507460296, + "timestamp": "2025-10-01 03:29:26.401522", + "step": 6726, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.434144", + "step": 6726, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017232044774573296, + "timestamp": "2025-10-01 03:29:26.436600", + "step": 6727, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.467599", + "step": 6727, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001476992474636063, + "timestamp": "2025-10-01 03:29:26.491455", + "step": 6728, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.523212", + "step": 6728, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021604847279377282, + "timestamp": "2025-10-01 03:29:26.527987", + "step": 6729, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.560370", + "step": 6729, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043997561442665756, + "timestamp": "2025-10-01 03:29:26.562983", + "step": 6730, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.594516", + "step": 6730, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006466064602136612, + "timestamp": "2025-10-01 03:29:26.597672", + "step": 6731, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.632925", + "step": 6731, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001260436838492751, + "timestamp": "2025-10-01 03:29:26.656839", + "step": 6732, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.688676", + "step": 6732, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021610059775412083, + "timestamp": "2025-10-01 03:29:26.691292", + "step": 6733, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.725722", + "step": 6733, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015705004625488073, + "timestamp": "2025-10-01 03:29:26.728888", + "step": 6734, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.762314", + "step": 6734, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001421489578206092, + "timestamp": "2025-10-01 03:29:26.765672", + "step": 6735, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.796915", + "step": 6735, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004887881223112345, + "timestamp": "2025-10-01 03:29:26.821797", + "step": 6736, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.862786", + "step": 6736, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002775447443127632, + "timestamp": "2025-10-01 03:29:26.865745", + "step": 6737, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.901927", + "step": 6737, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.905784488888457e-05, + "timestamp": "2025-10-01 03:29:26.904848", + "step": 6738, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:26.936615", + "step": 6738, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.346453301375732e-05, + "timestamp": "2025-10-01 03:29:26.939483", + "step": 6739, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:26.972415", + "step": 6739, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.489730342058465e-05, + "timestamp": "2025-10-01 03:29:26.998677", + "step": 6740, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.033559", + "step": 6740, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.416780474362895e-05, + "timestamp": "2025-10-01 03:29:27.037572", + "step": 6741, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.072121", + "step": 6741, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001154426354332827, + "timestamp": "2025-10-01 03:29:27.077254", + "step": 6742, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.112322", + "step": 6742, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011323402577545494, + "timestamp": "2025-10-01 03:29:27.116133", + "step": 6743, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:27.152992", + "step": 6743, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010848107922356576, + "timestamp": "2025-10-01 03:29:27.178553", + "step": 6744, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:27.224748", + "step": 6744, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020702744950540364, + "timestamp": "2025-10-01 03:29:27.228166", + "step": 6745, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.262284", + "step": 6745, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014599558897316456, + "timestamp": "2025-10-01 03:29:27.266004", + "step": 6746, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:27.302309", + "step": 6746, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024329339794348925, + "timestamp": "2025-10-01 03:29:27.305999", + "step": 6747, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:27.343676", + "step": 6747, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.8891357184620574e-05, + "timestamp": "2025-10-01 03:29:27.369771", + "step": 6748, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.403861", + "step": 6748, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016965632094070315, + "timestamp": "2025-10-01 03:29:27.407639", + "step": 6749, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.444072", + "step": 6749, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005416460335254669, + "timestamp": "2025-10-01 03:29:27.447922", + "step": 6750, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:27.482497", + "step": 6750, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001494805619586259, + "timestamp": "2025-10-01 03:29:27.486891", + "step": 6751, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.525393", + "step": 6751, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026600431650877, + "timestamp": "2025-10-01 03:29:27.555595", + "step": 6752, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.594299", + "step": 6752, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011442607501521707, + "timestamp": "2025-10-01 03:29:27.598123", + "step": 6753, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.636218", + "step": 6753, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.156316860346124e-05, + "timestamp": "2025-10-01 03:29:27.639866", + "step": 6754, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.675724", + "step": 6754, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014884094707667828, + "timestamp": "2025-10-01 03:29:27.679152", + "step": 6755, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.715204", + "step": 6755, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.5498097935924307e-05, + "timestamp": "2025-10-01 03:29:27.740769", + "step": 6756, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.775113", + "step": 6756, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.877151751425117e-05, + "timestamp": "2025-10-01 03:29:27.778863", + "step": 6757, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.813092", + "step": 6757, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032623353763483465, + "timestamp": "2025-10-01 03:29:27.816518", + "step": 6758, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.851819", + "step": 6758, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.683202031534165e-05, + "timestamp": "2025-10-01 03:29:27.855759", + "step": 6759, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:27.891038", + "step": 6759, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023491783940698951, + "timestamp": "2025-10-01 03:29:27.916813", + "step": 6760, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.951090", + "step": 6760, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016250180487986654, + "timestamp": "2025-10-01 03:29:27.954749", + "step": 6761, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:27.988846", + "step": 6761, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.3967283747624606e-05, + "timestamp": "2025-10-01 03:29:27.994126", + "step": 6762, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:28.030042", + "step": 6762, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010715965618146583, + "timestamp": "2025-10-01 03:29:28.034300", + "step": 6763, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.072943", + "step": 6763, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001290805812459439, + "timestamp": "2025-10-01 03:29:28.097633", + "step": 6764, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.133026", + "step": 6764, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.508555427193642e-05, + "timestamp": "2025-10-01 03:29:28.137407", + "step": 6765, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.173058", + "step": 6765, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008605843759141862, + "timestamp": "2025-10-01 03:29:28.179194", + "step": 6766, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.215832", + "step": 6766, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.639433169970289e-05, + "timestamp": "2025-10-01 03:29:28.219245", + "step": 6767, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.253158", + "step": 6767, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0038972191978245974, + "timestamp": "2025-10-01 03:29:28.277417", + "step": 6768, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.311253", + "step": 6768, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013327348278835416, + "timestamp": "2025-10-01 03:29:28.314022", + "step": 6769, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.351311", + "step": 6769, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005552190123125911, + "timestamp": "2025-10-01 03:29:28.354894", + "step": 6770, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:29:28.388774", + "step": 6770, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.9374015361536294e-05, + "timestamp": "2025-10-01 03:29:28.391777", + "step": 6771, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.424366", + "step": 6771, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015769041783642024, + "timestamp": "2025-10-01 03:29:28.449276", + "step": 6772, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.482760", + "step": 6772, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0037663232069462538, + "timestamp": "2025-10-01 03:29:28.485691", + "step": 6773, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.519034", + "step": 6773, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.980473987525329e-05, + "timestamp": "2025-10-01 03:29:28.522030", + "step": 6774, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.554727", + "step": 6774, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.9898029576288536e-05, + "timestamp": "2025-10-01 03:29:28.558757", + "step": 6775, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:29:28.597192", + "step": 6775, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018026902107521892, + "timestamp": "2025-10-01 03:29:28.622521", + "step": 6776, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.679903", + "step": 6776, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004827103402931243, + "timestamp": "2025-10-01 03:29:28.682232", + "step": 6777, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:28.718793", + "step": 6777, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002654791169334203, + "timestamp": "2025-10-01 03:29:28.721782", + "step": 6778, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.754035", + "step": 6778, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003709646116476506, + "timestamp": "2025-10-01 03:29:28.756481", + "step": 6779, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.791069", + "step": 6779, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.114903175737709e-05, + "timestamp": "2025-10-01 03:29:28.814622", + "step": 6780, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:28.856149", + "step": 6780, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004103677929379046, + "timestamp": "2025-10-01 03:29:28.858646", + "step": 6781, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:28.891794", + "step": 6781, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011191398371011019, + "timestamp": "2025-10-01 03:29:28.895862", + "step": 6782, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.933903", + "step": 6782, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0051644789054989815, + "timestamp": "2025-10-01 03:29:28.936282", + "step": 6783, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:28.976936", + "step": 6783, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.741013491293415e-05, + "timestamp": "2025-10-01 03:29:29.001492", + "step": 6784, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.049109", + "step": 6784, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020857653580605984, + "timestamp": "2025-10-01 03:29:29.052192", + "step": 6785, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.091353", + "step": 6785, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002122411533491686, + "timestamp": "2025-10-01 03:29:29.095194", + "step": 6786, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.135465", + "step": 6786, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018145480134990066, + "timestamp": "2025-10-01 03:29:29.138114", + "step": 6787, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.173710", + "step": 6787, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.022667476907372475, + "timestamp": "2025-10-01 03:29:29.198345", + "step": 6788, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.236262", + "step": 6788, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.813854608684778e-05, + "timestamp": "2025-10-01 03:29:29.238975", + "step": 6789, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:29.272510", + "step": 6789, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.944633059902117e-05, + "timestamp": "2025-10-01 03:29:29.275061", + "step": 6790, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.306067", + "step": 6790, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006243619718588889, + "timestamp": "2025-10-01 03:29:29.309126", + "step": 6791, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.347537", + "step": 6791, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020880498050246388, + "timestamp": "2025-10-01 03:29:29.383842", + "step": 6792, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.416582", + "step": 6792, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000173073640326038, + "timestamp": "2025-10-01 03:29:29.419117", + "step": 6793, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:29.452450", + "step": 6793, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011196922423550859, + "timestamp": "2025-10-01 03:29:29.455071", + "step": 6794, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.490086", + "step": 6794, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020707859948743135, + "timestamp": "2025-10-01 03:29:29.492475", + "step": 6795, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.527480", + "step": 6795, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002732161956373602, + "timestamp": "2025-10-01 03:29:29.551694", + "step": 6796, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.589368", + "step": 6796, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00949435867369175, + "timestamp": "2025-10-01 03:29:29.592359", + "step": 6797, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.627689", + "step": 6797, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001029323466354981, + "timestamp": "2025-10-01 03:29:29.630369", + "step": 6798, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.663252", + "step": 6798, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040669666486792266, + "timestamp": "2025-10-01 03:29:29.665749", + "step": 6799, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.698892", + "step": 6799, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006531658000312746, + "timestamp": "2025-10-01 03:29:29.723412", + "step": 6800, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:29.761584", + "step": 6800, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00385207193903625, + "timestamp": "2025-10-01 03:29:29.764230", + "step": 6801, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.798060", + "step": 6801, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.027166467159986496, + "timestamp": "2025-10-01 03:29:29.801172", + "step": 6802, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.840535", + "step": 6802, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003768214664887637, + "timestamp": "2025-10-01 03:29:29.843879", + "step": 6803, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.880560", + "step": 6803, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.7574280466069467e-05, + "timestamp": "2025-10-01 03:29:29.904737", + "step": 6804, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.940270", + "step": 6804, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.2369931179564446e-05, + "timestamp": "2025-10-01 03:29:29.942978", + "step": 6805, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:29.976160", + "step": 6805, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025533311418257654, + "timestamp": "2025-10-01 03:29:29.978622", + "step": 6806, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:30.011681", + "step": 6806, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011362923309206963, + "timestamp": "2025-10-01 03:29:30.014414", + "step": 6807, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:30.051671", + "step": 6807, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009870672365650535, + "timestamp": "2025-10-01 03:29:30.076540", + "step": 6808, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:30.113864", + "step": 6808, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000244928669417277, + "timestamp": "2025-10-01 03:29:30.116812", + "step": 6809, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:30.152849", + "step": 6809, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.370589158497751e-05, + "timestamp": "2025-10-01 03:29:30.155759", + "step": 6810, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:30.191175", + "step": 6810, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004193040949758142, + "timestamp": "2025-10-01 03:29:30.194131", + "step": 6811, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:30.229065", + "step": 6811, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010480205528438091, + "timestamp": "2025-10-01 03:29:30.253295", + "step": 6812, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:30.298702", + "step": 6812, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010705458407755941, + "timestamp": "2025-10-01 03:29:30.301169", + "step": 6813, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:30.342859", + "step": 6813, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05261076241731644, + "timestamp": "2025-10-01 03:29:30.345457", + "step": 6814, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:30.378417", + "step": 6814, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005566153558902442, + "timestamp": "2025-10-01 03:29:30.382423", + "step": 6815, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:30.416558", + "step": 6815, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006184538942761719, + "timestamp": "2025-10-01 03:29:30.440355", + "step": 6816, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:30.472025", + "step": 6816, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017561025742907077, + "timestamp": "2025-10-01 03:29:30.474611", + "step": 6817, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:30.512255", + "step": 6817, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.863989306613803e-05, + "timestamp": "2025-10-01 03:29:30.515075", + "step": 6818, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:30.550645", + "step": 6818, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000665843952447176, + "timestamp": "2025-10-01 03:29:30.554524", + "step": 6819, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:30.590782", + "step": 6819, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030290716676972806, + "timestamp": "2025-10-01 03:29:30.614686", + "step": 6820, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:30.655652", + "step": 6820, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012745945423375815, + "timestamp": "2025-10-01 03:29:30.658055", + "step": 6821, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:30.691571", + "step": 6821, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.864318806445226e-05, + "timestamp": "2025-10-01 03:29:30.693932", + "step": 6822, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:30.727881", + "step": 6822, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012082059402018785, + "timestamp": "2025-10-01 03:29:30.730166", + "step": 6823, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:30.762099", + "step": 6823, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018515074625611305, + "timestamp": "2025-10-01 03:29:30.785942", + "step": 6824, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:30.818518", + "step": 6824, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.784741996554658e-05, + "timestamp": "2025-10-01 03:29:30.820713", + "step": 6825, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:30.854546", + "step": 6825, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0338803306221962, + "timestamp": "2025-10-01 03:29:30.856574", + "step": 6826, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:30.888245", + "step": 6826, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.634059824747965e-05, + "timestamp": "2025-10-01 03:29:30.890262", + "step": 6827, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:30.933317", + "step": 6827, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011363838711986318, + "timestamp": "2025-10-01 03:29:30.957519", + "step": 6828, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:30.989238", + "step": 6828, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016096516628749669, + "timestamp": "2025-10-01 03:29:30.992104", + "step": 6829, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:31.024184", + "step": 6829, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001289984938921407, + "timestamp": "2025-10-01 03:29:31.027054", + "step": 6830, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:31.058570", + "step": 6830, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019488087855279446, + "timestamp": "2025-10-01 03:29:31.061484", + "step": 6831, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:31.096593", + "step": 6831, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030205189250409603, + "timestamp": "2025-10-01 03:29:31.120901", + "step": 6832, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:31.152942", + "step": 6832, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.559525642544031e-05, + "timestamp": "2025-10-01 03:29:31.155550", + "step": 6833, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:31.187067", + "step": 6833, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.287275366252288e-05, + "timestamp": "2025-10-01 03:29:31.189706", + "step": 6834, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:31.220699", + "step": 6834, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022201576211955398, + "timestamp": "2025-10-01 03:29:31.223336", + "step": 6835, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:31.255612", + "step": 6835, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016813362890388817, + "timestamp": "2025-10-01 03:29:31.279339", + "step": 6836, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:31.311402", + "step": 6836, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001149176896433346, + "timestamp": "2025-10-01 03:29:31.313872", + "step": 6837, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:31.352059", + "step": 6837, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012704871187452227, + "timestamp": "2025-10-01 03:29:31.354481", + "step": 6838, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:31.387481", + "step": 6838, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014323025941848755, + "timestamp": "2025-10-01 03:29:31.389669", + "step": 6839, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:31.429271", + "step": 6839, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024460002896375954, + "timestamp": "2025-10-01 03:29:31.453016", + "step": 6840, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:29:34.431558", + "step": 6840, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2649040.550021584, + "timestamp": "2025-10-01 03:29:34.434098", + "step": 6840, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:34.463708", + "step": 6840, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009928497020155191, + "timestamp": "2025-10-01 03:29:34.466690", + "step": 6841, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:34.499553", + "step": 6841, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019738684932235628, + "timestamp": "2025-10-01 03:29:34.502277", + "step": 6842, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:34.534770", + "step": 6842, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015159107744693756, + "timestamp": "2025-10-01 03:29:34.537859", + "step": 6843, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:34.579684", + "step": 6843, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019019305182155222, + "timestamp": "2025-10-01 03:29:34.605089", + "step": 6844, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:34.637655", + "step": 6844, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003832273941952735, + "timestamp": "2025-10-01 03:29:34.640852", + "step": 6845, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:34.672625", + "step": 6845, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021504121832549572, + "timestamp": "2025-10-01 03:29:34.676012", + "step": 6846, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:34.707973", + "step": 6846, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015296313213184476, + "timestamp": "2025-10-01 03:29:34.710510", + "step": 6847, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:34.746507", + "step": 6847, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001636145607335493, + "timestamp": "2025-10-01 03:29:34.771025", + "step": 6848, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:34.805517", + "step": 6848, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022266815358307213, + "timestamp": "2025-10-01 03:29:34.808370", + "step": 6849, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:34.844255", + "step": 6849, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006823346018791199, + "timestamp": "2025-10-01 03:29:34.847513", + "step": 6850, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:34.880858", + "step": 6850, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014790561981499195, + "timestamp": "2025-10-01 03:29:34.885172", + "step": 6851, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:34.920396", + "step": 6851, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.988599099917337e-05, + "timestamp": "2025-10-01 03:29:34.946092", + "step": 6852, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:34.986021", + "step": 6852, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002806345873977989, + "timestamp": "2025-10-01 03:29:34.989110", + "step": 6853, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:29:35.025334", + "step": 6853, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019311534240841866, + "timestamp": "2025-10-01 03:29:35.028161", + "step": 6854, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.060854", + "step": 6854, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004530876874923706, + "timestamp": "2025-10-01 03:29:35.063955", + "step": 6855, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.095599", + "step": 6855, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017315533477813005, + "timestamp": "2025-10-01 03:29:35.119877", + "step": 6856, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.155400", + "step": 6856, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00496257096529007, + "timestamp": "2025-10-01 03:29:35.158560", + "step": 6857, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.191187", + "step": 6857, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.07156895101070404, + "timestamp": "2025-10-01 03:29:35.194094", + "step": 6858, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.226686", + "step": 6858, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045966581092216074, + "timestamp": "2025-10-01 03:29:35.229625", + "step": 6859, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.265246", + "step": 6859, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005113166989758611, + "timestamp": "2025-10-01 03:29:35.289559", + "step": 6860, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.323544", + "step": 6860, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017517141532152891, + "timestamp": "2025-10-01 03:29:35.326465", + "step": 6861, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.360378", + "step": 6861, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002519177214708179, + "timestamp": "2025-10-01 03:29:35.363215", + "step": 6862, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:35.397560", + "step": 6862, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001055136090144515, + "timestamp": "2025-10-01 03:29:35.400747", + "step": 6863, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.437244", + "step": 6863, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017499139357823879, + "timestamp": "2025-10-01 03:29:35.461831", + "step": 6864, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.495667", + "step": 6864, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.553731797495857e-05, + "timestamp": "2025-10-01 03:29:35.498414", + "step": 6865, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.533350", + "step": 6865, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017537757521495223, + "timestamp": "2025-10-01 03:29:35.536480", + "step": 6866, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.571856", + "step": 6866, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.667554746149108e-05, + "timestamp": "2025-10-01 03:29:35.574984", + "step": 6867, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.610941", + "step": 6867, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0065177674405276775, + "timestamp": "2025-10-01 03:29:35.636529", + "step": 6868, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.678353", + "step": 6868, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010019405744969845, + "timestamp": "2025-10-01 03:29:35.680856", + "step": 6869, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:35.717242", + "step": 6869, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.358082752441987e-05, + "timestamp": "2025-10-01 03:29:35.720029", + "step": 6870, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.752026", + "step": 6870, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015275951591320336, + "timestamp": "2025-10-01 03:29:35.754514", + "step": 6871, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.789172", + "step": 6871, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042609692900441587, + "timestamp": "2025-10-01 03:29:35.813254", + "step": 6872, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.848216", + "step": 6872, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010636266088113189, + "timestamp": "2025-10-01 03:29:35.850629", + "step": 6873, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:35.883629", + "step": 6873, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014280052855610847, + "timestamp": "2025-10-01 03:29:35.885994", + "step": 6874, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.925383", + "step": 6874, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002481040428392589, + "timestamp": "2025-10-01 03:29:35.927913", + "step": 6875, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:35.961679", + "step": 6875, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035610952181741595, + "timestamp": "2025-10-01 03:29:35.986272", + "step": 6876, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:36.028531", + "step": 6876, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006292124278843403, + "timestamp": "2025-10-01 03:29:36.031766", + "step": 6877, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.065935", + "step": 6877, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011458083055913448, + "timestamp": "2025-10-01 03:29:36.068784", + "step": 6878, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:29:36.101526", + "step": 6878, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002498367801308632, + "timestamp": "2025-10-01 03:29:36.106054", + "step": 6879, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.138748", + "step": 6879, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.2999086619820446e-05, + "timestamp": "2025-10-01 03:29:36.163910", + "step": 6880, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.198216", + "step": 6880, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003023883909918368, + "timestamp": "2025-10-01 03:29:36.202107", + "step": 6881, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:36.235610", + "step": 6881, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001152667566202581, + "timestamp": "2025-10-01 03:29:36.241585", + "step": 6882, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:36.275570", + "step": 6882, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040129743865691125, + "timestamp": "2025-10-01 03:29:36.279231", + "step": 6883, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.311972", + "step": 6883, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03285376355051994, + "timestamp": "2025-10-01 03:29:36.336708", + "step": 6884, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:36.373182", + "step": 6884, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003934765700250864, + "timestamp": "2025-10-01 03:29:36.377759", + "step": 6885, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:36.416731", + "step": 6885, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005483345012180507, + "timestamp": "2025-10-01 03:29:36.423254", + "step": 6886, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.459671", + "step": 6886, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.775738388067111e-05, + "timestamp": "2025-10-01 03:29:36.463505", + "step": 6887, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.496123", + "step": 6887, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015784821880515665, + "timestamp": "2025-10-01 03:29:36.522006", + "step": 6888, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.558347", + "step": 6888, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045442869304679334, + "timestamp": "2025-10-01 03:29:36.561798", + "step": 6889, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.595801", + "step": 6889, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006742123514413834, + "timestamp": "2025-10-01 03:29:36.599178", + "step": 6890, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.631948", + "step": 6890, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016154389595612884, + "timestamp": "2025-10-01 03:29:36.636352", + "step": 6891, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.668598", + "step": 6891, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036192729021422565, + "timestamp": "2025-10-01 03:29:36.693558", + "step": 6892, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.725569", + "step": 6892, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007967590354382992, + "timestamp": "2025-10-01 03:29:36.729057", + "step": 6893, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.762310", + "step": 6893, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0036479972768574953, + "timestamp": "2025-10-01 03:29:36.766419", + "step": 6894, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.800061", + "step": 6894, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026293215341866016, + "timestamp": "2025-10-01 03:29:36.802620", + "step": 6895, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.836533", + "step": 6895, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011706281657097861, + "timestamp": "2025-10-01 03:29:36.863155", + "step": 6896, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.895349", + "step": 6896, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048170334775932133, + "timestamp": "2025-10-01 03:29:36.898811", + "step": 6897, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.930837", + "step": 6897, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.862843681825325e-05, + "timestamp": "2025-10-01 03:29:36.934782", + "step": 6898, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:36.966786", + "step": 6898, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007946689729578793, + "timestamp": "2025-10-01 03:29:36.970463", + "step": 6899, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.004076", + "step": 6899, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001684410235611722, + "timestamp": "2025-10-01 03:29:37.028811", + "step": 6900, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.061597", + "step": 6900, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013425848737824708, + "timestamp": "2025-10-01 03:29:37.065908", + "step": 6901, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.102571", + "step": 6901, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012513533874880522, + "timestamp": "2025-10-01 03:29:37.106363", + "step": 6902, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.144045", + "step": 6902, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004005507566034794, + "timestamp": "2025-10-01 03:29:37.148002", + "step": 6903, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:37.182084", + "step": 6903, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016318753478117287, + "timestamp": "2025-10-01 03:29:37.207421", + "step": 6904, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.240143", + "step": 6904, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004366436507552862, + "timestamp": "2025-10-01 03:29:37.243861", + "step": 6905, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.276152", + "step": 6905, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016947457566857338, + "timestamp": "2025-10-01 03:29:37.279695", + "step": 6906, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.313196", + "step": 6906, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020260455494280905, + "timestamp": "2025-10-01 03:29:37.316648", + "step": 6907, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.350395", + "step": 6907, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020229455549269915, + "timestamp": "2025-10-01 03:29:37.377532", + "step": 6908, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.417011", + "step": 6908, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002141729200957343, + "timestamp": "2025-10-01 03:29:37.421386", + "step": 6909, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:37.454535", + "step": 6909, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014885931159369648, + "timestamp": "2025-10-01 03:29:37.459932", + "step": 6910, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.493027", + "step": 6910, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022191123571246862, + "timestamp": "2025-10-01 03:29:37.496960", + "step": 6911, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.529151", + "step": 6911, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000234634819207713, + "timestamp": "2025-10-01 03:29:37.554729", + "step": 6912, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:37.588250", + "step": 6912, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018631726561579853, + "timestamp": "2025-10-01 03:29:37.593346", + "step": 6913, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.628309", + "step": 6913, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003736684157047421, + "timestamp": "2025-10-01 03:29:37.633915", + "step": 6914, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.668001", + "step": 6914, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022863238118588924, + "timestamp": "2025-10-01 03:29:37.672315", + "step": 6915, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:37.709441", + "step": 6915, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.33515329961665e-05, + "timestamp": "2025-10-01 03:29:37.733850", + "step": 6916, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.767877", + "step": 6916, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0033365972340106964, + "timestamp": "2025-10-01 03:29:37.770781", + "step": 6917, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.803417", + "step": 6917, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004932517651468515, + "timestamp": "2025-10-01 03:29:37.807760", + "step": 6918, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.841925", + "step": 6918, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001122703542932868, + "timestamp": "2025-10-01 03:29:37.845634", + "step": 6919, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.878160", + "step": 6919, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014820605283603072, + "timestamp": "2025-10-01 03:29:37.903210", + "step": 6920, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.933782", + "step": 6920, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004946459084749222, + "timestamp": "2025-10-01 03:29:37.936984", + "step": 6921, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:37.969540", + "step": 6921, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009551666444167495, + "timestamp": "2025-10-01 03:29:37.971985", + "step": 6922, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.003508", + "step": 6922, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007016176241450012, + "timestamp": "2025-10-01 03:29:38.006144", + "step": 6923, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.036940", + "step": 6923, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002781898947432637, + "timestamp": "2025-10-01 03:29:38.060854", + "step": 6924, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:38.094823", + "step": 6924, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005341754877008498, + "timestamp": "2025-10-01 03:29:38.097174", + "step": 6925, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:38.129921", + "step": 6925, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022213034390006214, + "timestamp": "2025-10-01 03:29:38.132581", + "step": 6926, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:38.170162", + "step": 6926, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003484654298517853, + "timestamp": "2025-10-01 03:29:38.172814", + "step": 6927, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.203327", + "step": 6927, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.074142998433672e-05, + "timestamp": "2025-10-01 03:29:38.227396", + "step": 6928, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:38.262970", + "step": 6928, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007521635270677507, + "timestamp": "2025-10-01 03:29:38.265250", + "step": 6929, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.299531", + "step": 6929, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018318708753213286, + "timestamp": "2025-10-01 03:29:38.302054", + "step": 6930, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:38.333005", + "step": 6930, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002133493107976392, + "timestamp": "2025-10-01 03:29:38.335814", + "step": 6931, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.367020", + "step": 6931, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015078020805958658, + "timestamp": "2025-10-01 03:29:38.392401", + "step": 6932, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:38.423937", + "step": 6932, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032857729820534587, + "timestamp": "2025-10-01 03:29:38.426846", + "step": 6933, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.461648", + "step": 6933, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004519038484431803, + "timestamp": "2025-10-01 03:29:38.464168", + "step": 6934, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:38.501260", + "step": 6934, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003530733520165086, + "timestamp": "2025-10-01 03:29:38.503918", + "step": 6935, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:38.535393", + "step": 6935, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011026179417967796, + "timestamp": "2025-10-01 03:29:38.559653", + "step": 6936, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.590867", + "step": 6936, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002681472571566701, + "timestamp": "2025-10-01 03:29:38.593930", + "step": 6937, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.628074", + "step": 6937, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00758437579497695, + "timestamp": "2025-10-01 03:29:38.630617", + "step": 6938, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.662725", + "step": 6938, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034243889967910945, + "timestamp": "2025-10-01 03:29:38.665426", + "step": 6939, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.698224", + "step": 6939, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012928519863635302, + "timestamp": "2025-10-01 03:29:38.723478", + "step": 6940, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.757324", + "step": 6940, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.3938951168674976e-05, + "timestamp": "2025-10-01 03:29:38.760203", + "step": 6941, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:29:38.792751", + "step": 6941, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018575992726255208, + "timestamp": "2025-10-01 03:29:38.795441", + "step": 6942, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.826448", + "step": 6942, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013994546316098422, + "timestamp": "2025-10-01 03:29:38.829143", + "step": 6943, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.862951", + "step": 6943, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005307411192916334, + "timestamp": "2025-10-01 03:29:38.887187", + "step": 6944, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.918212", + "step": 6944, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010120437946170568, + "timestamp": "2025-10-01 03:29:38.920898", + "step": 6945, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:38.954644", + "step": 6945, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011284386273473501, + "timestamp": "2025-10-01 03:29:38.957144", + "step": 6946, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:38.988700", + "step": 6946, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.516191933769733e-05, + "timestamp": "2025-10-01 03:29:38.991923", + "step": 6947, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.032133", + "step": 6947, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001596592366695404, + "timestamp": "2025-10-01 03:29:39.056268", + "step": 6948, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.086932", + "step": 6948, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010228718165308237, + "timestamp": "2025-10-01 03:29:39.089324", + "step": 6949, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.119965", + "step": 6949, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.088472996954806e-05, + "timestamp": "2025-10-01 03:29:39.122874", + "step": 6950, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.154400", + "step": 6950, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.3877680582227185e-05, + "timestamp": "2025-10-01 03:29:39.157149", + "step": 6951, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:39.188995", + "step": 6951, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00963359884917736, + "timestamp": "2025-10-01 03:29:39.212932", + "step": 6952, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.243993", + "step": 6952, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031175417825579643, + "timestamp": "2025-10-01 03:29:39.246758", + "step": 6953, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.277604", + "step": 6953, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013570699957199395, + "timestamp": "2025-10-01 03:29:39.281647", + "step": 6954, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.313300", + "step": 6954, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034820653963834047, + "timestamp": "2025-10-01 03:29:39.316316", + "step": 6955, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.348419", + "step": 6955, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005238278536126018, + "timestamp": "2025-10-01 03:29:39.372522", + "step": 6956, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:39.403731", + "step": 6956, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010368212824687362, + "timestamp": "2025-10-01 03:29:39.406678", + "step": 6957, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.437902", + "step": 6957, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.758751937421039e-05, + "timestamp": "2025-10-01 03:29:39.440601", + "step": 6958, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.471412", + "step": 6958, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016651605255901814, + "timestamp": "2025-10-01 03:29:39.474147", + "step": 6959, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:39.506983", + "step": 6959, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035631097853183746, + "timestamp": "2025-10-01 03:29:39.531119", + "step": 6960, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:39.563462", + "step": 6960, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017985861049965024, + "timestamp": "2025-10-01 03:29:39.566229", + "step": 6961, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.597731", + "step": 6961, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022524951491504908, + "timestamp": "2025-10-01 03:29:39.600298", + "step": 6962, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.631537", + "step": 6962, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002445026533678174, + "timestamp": "2025-10-01 03:29:39.634852", + "step": 6963, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.666326", + "step": 6963, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004948217538185418, + "timestamp": "2025-10-01 03:29:39.690939", + "step": 6964, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:39.723460", + "step": 6964, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021347691654227674, + "timestamp": "2025-10-01 03:29:39.727827", + "step": 6965, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:39.758201", + "step": 6965, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003688315919134766, + "timestamp": "2025-10-01 03:29:39.760582", + "step": 6966, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.790852", + "step": 6966, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.134985647397116e-05, + "timestamp": "2025-10-01 03:29:39.793139", + "step": 6967, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.823097", + "step": 6967, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001967022428289056, + "timestamp": "2025-10-01 03:29:39.847146", + "step": 6968, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:39.877896", + "step": 6968, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001224062725668773, + "timestamp": "2025-10-01 03:29:39.882070", + "step": 6969, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.928754", + "step": 6969, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014344939962029457, + "timestamp": "2025-10-01 03:29:39.932003", + "step": 6970, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:39.963249", + "step": 6970, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027357458020560443, + "timestamp": "2025-10-01 03:29:39.977186", + "step": 6971, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.009256", + "step": 6971, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027150960522703826, + "timestamp": "2025-10-01 03:29:40.036154", + "step": 6972, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.072890", + "step": 6972, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.06169750168919563, + "timestamp": "2025-10-01 03:29:40.075538", + "step": 6973, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.107079", + "step": 6973, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.858819688204676e-05, + "timestamp": "2025-10-01 03:29:40.109083", + "step": 6974, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:40.144237", + "step": 6974, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012573903950396925, + "timestamp": "2025-10-01 03:29:40.149897", + "step": 6975, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:40.180409", + "step": 6975, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013172996114008129, + "timestamp": "2025-10-01 03:29:40.204387", + "step": 6976, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.235212", + "step": 6976, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000844119640532881, + "timestamp": "2025-10-01 03:29:40.237686", + "step": 6977, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.272040", + "step": 6977, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013776998093817383, + "timestamp": "2025-10-01 03:29:40.274644", + "step": 6978, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.304950", + "step": 6978, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.4361203587614e-05, + "timestamp": "2025-10-01 03:29:40.307419", + "step": 6979, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:40.348495", + "step": 6979, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019690915942192078, + "timestamp": "2025-10-01 03:29:40.372452", + "step": 6980, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.406236", + "step": 6980, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003890289517585188, + "timestamp": "2025-10-01 03:29:40.409027", + "step": 6981, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.439952", + "step": 6981, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021822303824592382, + "timestamp": "2025-10-01 03:29:40.442038", + "step": 6982, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.472053", + "step": 6982, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000294988916721195, + "timestamp": "2025-10-01 03:29:40.474306", + "step": 6983, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.504737", + "step": 6983, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004558925982564688, + "timestamp": "2025-10-01 03:29:40.528927", + "step": 6984, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:40.559291", + "step": 6984, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004054978198837489, + "timestamp": "2025-10-01 03:29:40.561829", + "step": 6985, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.592628", + "step": 6985, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000469454622361809, + "timestamp": "2025-10-01 03:29:40.594927", + "step": 6986, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.627582", + "step": 6986, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008418933139182627, + "timestamp": "2025-10-01 03:29:40.629916", + "step": 6987, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.662950", + "step": 6987, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002781198127195239, + "timestamp": "2025-10-01 03:29:40.686638", + "step": 6988, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.717862", + "step": 6988, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022518604237120599, + "timestamp": "2025-10-01 03:29:40.720342", + "step": 6989, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:40.750832", + "step": 6989, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007695471285842359, + "timestamp": "2025-10-01 03:29:40.753092", + "step": 6990, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.784860", + "step": 6990, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002196666318923235, + "timestamp": "2025-10-01 03:29:40.787170", + "step": 6991, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:40.819883", + "step": 6991, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03470819443464279, + "timestamp": "2025-10-01 03:29:40.843730", + "step": 6992, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:29:43.074916", + "step": 6992, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2631976.44441265, + "timestamp": "2025-10-01 03:29:43.082276", + "step": 6992, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:43.113021", + "step": 6992, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018956183921545744, + "timestamp": "2025-10-01 03:29:43.116143", + "step": 6993, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:43.147161", + "step": 6993, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.330501830438152e-05, + "timestamp": "2025-10-01 03:29:43.150258", + "step": 6994, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:43.182595", + "step": 6994, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003373210085555911, + "timestamp": "2025-10-01 03:29:43.186527", + "step": 6995, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:43.227836", + "step": 6995, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001177705591544509, + "timestamp": "2025-10-01 03:29:43.253560", + "step": 6996, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:43.284518", + "step": 6996, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003467377391643822, + "timestamp": "2025-10-01 03:29:43.288631", + "step": 6997, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:43.322216", + "step": 6997, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.082001684466377e-05, + "timestamp": "2025-10-01 03:29:43.324796", + "step": 6998, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:43.357331", + "step": 6998, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.32612796430476e-05, + "timestamp": "2025-10-01 03:29:43.360324", + "step": 6999, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:43.392479", + "step": 6999, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.892355253919959e-05, + "timestamp": "2025-10-01 03:29:43.416983", + "step": 7000, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 7000", + "timestamp": "2025-10-01 03:29:48.862071", + "step": 7000, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:48.902243", + "step": 7000, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022900833282619715, + "timestamp": "2025-10-01 03:29:48.906083", + "step": 7001, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:48.939855", + "step": 7001, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011277960584266111, + "timestamp": "2025-10-01 03:29:48.942851", + "step": 7002, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:48.975200", + "step": 7002, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022202204854693264, + "timestamp": "2025-10-01 03:29:48.977649", + "step": 7003, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:49.015880", + "step": 7003, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.223045471007936e-05, + "timestamp": "2025-10-01 03:29:49.039848", + "step": 7004, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.072848", + "step": 7004, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013349867425858974, + "timestamp": "2025-10-01 03:29:49.075162", + "step": 7005, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:49.112199", + "step": 7005, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036368504515849054, + "timestamp": "2025-10-01 03:29:49.114996", + "step": 7006, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.147861", + "step": 7006, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005628376267850399, + "timestamp": "2025-10-01 03:29:49.150910", + "step": 7007, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:49.191960", + "step": 7007, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036367602297104895, + "timestamp": "2025-10-01 03:29:49.228499", + "step": 7008, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.261177", + "step": 7008, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001158481536549516, + "timestamp": "2025-10-01 03:29:49.264843", + "step": 7009, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.298698", + "step": 7009, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001170589093817398, + "timestamp": "2025-10-01 03:29:49.301046", + "step": 7010, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.337828", + "step": 7010, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010614419443299994, + "timestamp": "2025-10-01 03:29:49.340549", + "step": 7011, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.373051", + "step": 7011, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.234517347067595e-05, + "timestamp": "2025-10-01 03:29:49.397340", + "step": 7012, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.435675", + "step": 7012, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016851995314937085, + "timestamp": "2025-10-01 03:29:49.438344", + "step": 7013, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.470088", + "step": 7013, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010671177878975868, + "timestamp": "2025-10-01 03:29:49.472925", + "step": 7014, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:49.507286", + "step": 7014, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005573671660386026, + "timestamp": "2025-10-01 03:29:49.509932", + "step": 7015, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.541916", + "step": 7015, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005251839174889028, + "timestamp": "2025-10-01 03:29:49.565852", + "step": 7016, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.598066", + "step": 7016, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010468171967659146, + "timestamp": "2025-10-01 03:29:49.601079", + "step": 7017, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.641041", + "step": 7017, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012347009032964706, + "timestamp": "2025-10-01 03:29:49.643864", + "step": 7018, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.675779", + "step": 7018, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021352695301175117, + "timestamp": "2025-10-01 03:29:49.678277", + "step": 7019, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.710941", + "step": 7019, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005948882317170501, + "timestamp": "2025-10-01 03:29:49.736225", + "step": 7020, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.769408", + "step": 7020, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009115102584473789, + "timestamp": "2025-10-01 03:29:49.772051", + "step": 7021, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.807586", + "step": 7021, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015291070565581322, + "timestamp": "2025-10-01 03:29:49.810387", + "step": 7022, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:49.848957", + "step": 7022, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00440233014523983, + "timestamp": "2025-10-01 03:29:49.851678", + "step": 7023, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.887670", + "step": 7023, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014875188935548067, + "timestamp": "2025-10-01 03:29:49.911871", + "step": 7024, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.944518", + "step": 7024, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007786078494973481, + "timestamp": "2025-10-01 03:29:49.946904", + "step": 7025, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:49.988343", + "step": 7025, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024091590603347868, + "timestamp": "2025-10-01 03:29:49.991148", + "step": 7026, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.024364", + "step": 7026, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001814244023989886, + "timestamp": "2025-10-01 03:29:50.026882", + "step": 7027, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.060342", + "step": 7027, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010536456480622292, + "timestamp": "2025-10-01 03:29:50.085285", + "step": 7028, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.116761", + "step": 7028, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001304657431319356, + "timestamp": "2025-10-01 03:29:50.120333", + "step": 7029, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.155741", + "step": 7029, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002953308285214007, + "timestamp": "2025-10-01 03:29:50.158118", + "step": 7030, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.190851", + "step": 7030, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015517119027208537, + "timestamp": "2025-10-01 03:29:50.194041", + "step": 7031, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.226561", + "step": 7031, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011269978858763352, + "timestamp": "2025-10-01 03:29:50.250408", + "step": 7032, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.285611", + "step": 7032, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011207947682123631, + "timestamp": "2025-10-01 03:29:50.295831", + "step": 7033, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.330748", + "step": 7033, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013761112932115793, + "timestamp": "2025-10-01 03:29:50.333621", + "step": 7034, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.367990", + "step": 7034, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.765494905877858e-05, + "timestamp": "2025-10-01 03:29:50.371121", + "step": 7035, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.405073", + "step": 7035, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.3425865694880486e-05, + "timestamp": "2025-10-01 03:29:50.429432", + "step": 7036, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.463672", + "step": 7036, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021005862799938768, + "timestamp": "2025-10-01 03:29:50.466804", + "step": 7037, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.502568", + "step": 7037, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.07827707380056381, + "timestamp": "2025-10-01 03:29:50.505316", + "step": 7038, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.539474", + "step": 7038, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.957206328981556e-05, + "timestamp": "2025-10-01 03:29:50.542740", + "step": 7039, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.576436", + "step": 7039, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005859248340129852, + "timestamp": "2025-10-01 03:29:50.602090", + "step": 7040, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.641743", + "step": 7040, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013434501306619495, + "timestamp": "2025-10-01 03:29:50.644527", + "step": 7041, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.676541", + "step": 7041, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016918400069698691, + "timestamp": "2025-10-01 03:29:50.678920", + "step": 7042, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.713053", + "step": 7042, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010644685244187713, + "timestamp": "2025-10-01 03:29:50.715772", + "step": 7043, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.748398", + "step": 7043, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022267807798925787, + "timestamp": "2025-10-01 03:29:50.773870", + "step": 7044, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.808670", + "step": 7044, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016378064174205065, + "timestamp": "2025-10-01 03:29:50.811861", + "step": 7045, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:50.847651", + "step": 7045, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010443030623719096, + "timestamp": "2025-10-01 03:29:50.851236", + "step": 7046, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.886785", + "step": 7046, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018151944503188133, + "timestamp": "2025-10-01 03:29:50.889227", + "step": 7047, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:50.923071", + "step": 7047, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011716281063854694, + "timestamp": "2025-10-01 03:29:50.947516", + "step": 7048, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:50.981721", + "step": 7048, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.377991955261678e-05, + "timestamp": "2025-10-01 03:29:50.984103", + "step": 7049, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.016710", + "step": 7049, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.795908535015769e-05, + "timestamp": "2025-10-01 03:29:51.019219", + "step": 7050, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.059622", + "step": 7050, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000359740894054994, + "timestamp": "2025-10-01 03:29:51.062163", + "step": 7051, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.097150", + "step": 7051, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.860436405986547e-05, + "timestamp": "2025-10-01 03:29:51.121140", + "step": 7052, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.154597", + "step": 7052, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.612315675942227e-05, + "timestamp": "2025-10-01 03:29:51.157161", + "step": 7053, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.195966", + "step": 7053, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05971092730760574, + "timestamp": "2025-10-01 03:29:51.198435", + "step": 7054, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:51.230635", + "step": 7054, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.818773883627728e-05, + "timestamp": "2025-10-01 03:29:51.235358", + "step": 7055, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.269964", + "step": 7055, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014083095826208591, + "timestamp": "2025-10-01 03:29:51.293913", + "step": 7056, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.326207", + "step": 7056, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.154291233746335e-05, + "timestamp": "2025-10-01 03:29:51.328528", + "step": 7057, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.372568", + "step": 7057, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.044677088037133e-05, + "timestamp": "2025-10-01 03:29:51.375395", + "step": 7058, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:51.408478", + "step": 7058, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.580555054824799e-05, + "timestamp": "2025-10-01 03:29:51.410969", + "step": 7059, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.448175", + "step": 7059, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008100094273686409, + "timestamp": "2025-10-01 03:29:51.472244", + "step": 7060, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.505414", + "step": 7060, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015286811685655266, + "timestamp": "2025-10-01 03:29:51.507931", + "step": 7061, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.541208", + "step": 7061, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.894723967183381e-05, + "timestamp": "2025-10-01 03:29:51.544331", + "step": 7062, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.580732", + "step": 7062, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031731047201901674, + "timestamp": "2025-10-01 03:29:51.582941", + "step": 7063, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.623009", + "step": 7063, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006157210445962846, + "timestamp": "2025-10-01 03:29:51.647089", + "step": 7064, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.684503", + "step": 7064, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017897052748594433, + "timestamp": "2025-10-01 03:29:51.687081", + "step": 7065, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.719972", + "step": 7065, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008231214247643948, + "timestamp": "2025-10-01 03:29:51.722491", + "step": 7066, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.762616", + "step": 7066, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.359326941194013e-05, + "timestamp": "2025-10-01 03:29:51.765046", + "step": 7067, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:51.797708", + "step": 7067, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038907499401830137, + "timestamp": "2025-10-01 03:29:51.821712", + "step": 7068, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:51.853799", + "step": 7068, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001169691444374621, + "timestamp": "2025-10-01 03:29:51.856773", + "step": 7069, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:51.890866", + "step": 7069, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.506303957896307e-05, + "timestamp": "2025-10-01 03:29:51.893154", + "step": 7070, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.926445", + "step": 7070, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.918440340086818e-05, + "timestamp": "2025-10-01 03:29:51.929804", + "step": 7071, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:51.962508", + "step": 7071, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.124885865370743e-05, + "timestamp": "2025-10-01 03:29:51.986658", + "step": 7072, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.026560", + "step": 7072, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005615045665763319, + "timestamp": "2025-10-01 03:29:52.029215", + "step": 7073, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.063794", + "step": 7073, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002882035623770207, + "timestamp": "2025-10-01 03:29:52.066511", + "step": 7074, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.105385", + "step": 7074, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.351821113843471e-05, + "timestamp": "2025-10-01 03:29:52.108081", + "step": 7075, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:52.142850", + "step": 7075, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002307630202267319, + "timestamp": "2025-10-01 03:29:52.168496", + "step": 7076, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.200404", + "step": 7076, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019019606988877058, + "timestamp": "2025-10-01 03:29:52.203060", + "step": 7077, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.236087", + "step": 7077, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004117968957871199, + "timestamp": "2025-10-01 03:29:52.239140", + "step": 7078, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.273029", + "step": 7078, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016878369206096977, + "timestamp": "2025-10-01 03:29:52.275774", + "step": 7079, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:52.310107", + "step": 7079, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035227257758378983, + "timestamp": "2025-10-01 03:29:52.334653", + "step": 7080, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:52.370405", + "step": 7080, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.279023364186287e-05, + "timestamp": "2025-10-01 03:29:52.372906", + "step": 7081, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.407919", + "step": 7081, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.132506627589464e-05, + "timestamp": "2025-10-01 03:29:52.410877", + "step": 7082, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.445565", + "step": 7082, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.9554106731666252e-05, + "timestamp": "2025-10-01 03:29:52.449657", + "step": 7083, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.489055", + "step": 7083, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001399298053001985, + "timestamp": "2025-10-01 03:29:52.514884", + "step": 7084, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.552305", + "step": 7084, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010519715578993782, + "timestamp": "2025-10-01 03:29:52.555109", + "step": 7085, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.592570", + "step": 7085, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003199163475073874, + "timestamp": "2025-10-01 03:29:52.595002", + "step": 7086, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.628411", + "step": 7086, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013353377289604396, + "timestamp": "2025-10-01 03:29:52.631060", + "step": 7087, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.666347", + "step": 7087, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020407172851264477, + "timestamp": "2025-10-01 03:29:52.690549", + "step": 7088, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:52.722683", + "step": 7088, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009206347749568522, + "timestamp": "2025-10-01 03:29:52.725691", + "step": 7089, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.758468", + "step": 7089, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006415181560441852, + "timestamp": "2025-10-01 03:29:52.761184", + "step": 7090, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:52.794726", + "step": 7090, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001487654633820057, + "timestamp": "2025-10-01 03:29:52.797671", + "step": 7091, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:52.833171", + "step": 7091, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.950854003662243e-05, + "timestamp": "2025-10-01 03:29:52.857242", + "step": 7092, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.898541", + "step": 7092, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.524944132659584e-05, + "timestamp": "2025-10-01 03:29:52.901285", + "step": 7093, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.934812", + "step": 7093, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.608720337273553e-05, + "timestamp": "2025-10-01 03:29:52.945443", + "step": 7094, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:52.979277", + "step": 7094, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004454675072338432, + "timestamp": "2025-10-01 03:29:52.982809", + "step": 7095, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.018357", + "step": 7095, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.1584506107028574e-05, + "timestamp": "2025-10-01 03:29:53.042590", + "step": 7096, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.075227", + "step": 7096, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028717159875668585, + "timestamp": "2025-10-01 03:29:53.078003", + "step": 7097, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.110745", + "step": 7097, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035659242421388626, + "timestamp": "2025-10-01 03:29:53.113811", + "step": 7098, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.149597", + "step": 7098, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.8194770493428223e-05, + "timestamp": "2025-10-01 03:29:53.152444", + "step": 7099, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.187691", + "step": 7099, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.644353561569005e-05, + "timestamp": "2025-10-01 03:29:53.213559", + "step": 7100, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.246794", + "step": 7100, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.9993072025245056e-05, + "timestamp": "2025-10-01 03:29:53.249316", + "step": 7101, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.286867", + "step": 7101, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022631140891462564, + "timestamp": "2025-10-01 03:29:53.291073", + "step": 7102, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.336827", + "step": 7102, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.348970343125984e-05, + "timestamp": "2025-10-01 03:29:53.339527", + "step": 7103, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.373675", + "step": 7103, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008259308524429798, + "timestamp": "2025-10-01 03:29:53.398126", + "step": 7104, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.433802", + "step": 7104, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008965112268924713, + "timestamp": "2025-10-01 03:29:53.438488", + "step": 7105, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.473744", + "step": 7105, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00056765868794173, + "timestamp": "2025-10-01 03:29:53.476247", + "step": 7106, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.509941", + "step": 7106, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041829925612546504, + "timestamp": "2025-10-01 03:29:53.512279", + "step": 7107, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.552656", + "step": 7107, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004631758201867342, + "timestamp": "2025-10-01 03:29:53.576783", + "step": 7108, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.619295", + "step": 7108, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.94956722529605e-05, + "timestamp": "2025-10-01 03:29:53.621853", + "step": 7109, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.656911", + "step": 7109, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.9492903195205145e-05, + "timestamp": "2025-10-01 03:29:53.659334", + "step": 7110, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.701551", + "step": 7110, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.012457379256375e-05, + "timestamp": "2025-10-01 03:29:53.704156", + "step": 7111, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.741415", + "step": 7111, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.306690870085731e-05, + "timestamp": "2025-10-01 03:29:53.765187", + "step": 7112, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.809177", + "step": 7112, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.94615038507618e-05, + "timestamp": "2025-10-01 03:29:53.811615", + "step": 7113, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.847389", + "step": 7113, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001551328314235434, + "timestamp": "2025-10-01 03:29:53.850646", + "step": 7114, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.895611", + "step": 7114, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004017105791717768, + "timestamp": "2025-10-01 03:29:53.899233", + "step": 7115, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:53.934925", + "step": 7115, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.5110682549420744e-05, + "timestamp": "2025-10-01 03:29:53.959595", + "step": 7116, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.003690", + "step": 7116, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012624483497347683, + "timestamp": "2025-10-01 03:29:54.006799", + "step": 7117, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.040599", + "step": 7117, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012948090210556984, + "timestamp": "2025-10-01 03:29:54.044137", + "step": 7118, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.078375", + "step": 7118, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011806933209300041, + "timestamp": "2025-10-01 03:29:54.081600", + "step": 7119, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.121336", + "step": 7119, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.804791049333289e-05, + "timestamp": "2025-10-01 03:29:54.146261", + "step": 7120, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.180143", + "step": 7120, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0044301776215434074, + "timestamp": "2025-10-01 03:29:54.183983", + "step": 7121, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:54.220626", + "step": 7121, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.591449396684766e-05, + "timestamp": "2025-10-01 03:29:54.224500", + "step": 7122, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.257556", + "step": 7122, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007490398827940226, + "timestamp": "2025-10-01 03:29:54.262000", + "step": 7123, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:54.296805", + "step": 7123, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002561757282819599, + "timestamp": "2025-10-01 03:29:54.324020", + "step": 7124, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.361661", + "step": 7124, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006777324713766575, + "timestamp": "2025-10-01 03:29:54.364642", + "step": 7125, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.398817", + "step": 7125, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020333859720267355, + "timestamp": "2025-10-01 03:29:54.401753", + "step": 7126, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.436683", + "step": 7126, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.030738363042473793, + "timestamp": "2025-10-01 03:29:54.440048", + "step": 7127, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:54.472817", + "step": 7127, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006661901716142893, + "timestamp": "2025-10-01 03:29:54.498029", + "step": 7128, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:54.534427", + "step": 7128, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001110719604184851, + "timestamp": "2025-10-01 03:29:54.538421", + "step": 7129, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.571082", + "step": 7129, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007018303032964468, + "timestamp": "2025-10-01 03:29:54.577318", + "step": 7130, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:54.612192", + "step": 7130, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0067850458435714245, + "timestamp": "2025-10-01 03:29:54.615857", + "step": 7131, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:54.649155", + "step": 7131, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001538729266030714, + "timestamp": "2025-10-01 03:29:54.673909", + "step": 7132, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.706595", + "step": 7132, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.724840593757108e-05, + "timestamp": "2025-10-01 03:29:54.709950", + "step": 7133, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.743222", + "step": 7133, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.0856509662698954e-05, + "timestamp": "2025-10-01 03:29:54.747441", + "step": 7134, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.780226", + "step": 7134, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04809415712952614, + "timestamp": "2025-10-01 03:29:54.783922", + "step": 7135, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:54.820804", + "step": 7135, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001730631454847753, + "timestamp": "2025-10-01 03:29:54.845946", + "step": 7136, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:54.882159", + "step": 7136, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.354275395395234e-05, + "timestamp": "2025-10-01 03:29:54.886632", + "step": 7137, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:54.924271", + "step": 7137, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.6010594562394544e-05, + "timestamp": "2025-10-01 03:29:54.928110", + "step": 7138, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:54.962105", + "step": 7138, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018579578027129173, + "timestamp": "2025-10-01 03:29:54.965749", + "step": 7139, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:55.001740", + "step": 7139, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.8764075220096856e-05, + "timestamp": "2025-10-01 03:29:55.026949", + "step": 7140, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:55.065469", + "step": 7140, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001276919210795313, + "timestamp": "2025-10-01 03:29:55.068867", + "step": 7141, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:55.105934", + "step": 7141, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03883164003491402, + "timestamp": "2025-10-01 03:29:55.109588", + "step": 7142, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:55.143481", + "step": 7142, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001439412881154567, + "timestamp": "2025-10-01 03:29:55.147415", + "step": 7143, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:55.188692", + "step": 7143, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029005780816078186, + "timestamp": "2025-10-01 03:29:55.213836", + "step": 7144, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:29:57.954916", + "step": 7144, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2532041.6103829257, + "timestamp": "2025-10-01 03:29:57.969655", + "step": 7144, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.011415", + "step": 7144, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001827136002248153, + "timestamp": "2025-10-01 03:29:58.028206", + "step": 7145, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:29:58.085983", + "step": 7145, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012645799142774194, + "timestamp": "2025-10-01 03:29:58.099417", + "step": 7146, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:58.154782", + "step": 7146, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002115461975336075, + "timestamp": "2025-10-01 03:29:58.165211", + "step": 7147, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.220474", + "step": 7147, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01205090619623661, + "timestamp": "2025-10-01 03:29:58.253932", + "step": 7148, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.297023", + "step": 7148, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014015778433531523, + "timestamp": "2025-10-01 03:29:58.300158", + "step": 7149, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.357300", + "step": 7149, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014035068452358246, + "timestamp": "2025-10-01 03:29:58.364621", + "step": 7150, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.406344", + "step": 7150, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.0833488785428926e-05, + "timestamp": "2025-10-01 03:29:58.414003", + "step": 7151, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.463215", + "step": 7151, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013553652388509363, + "timestamp": "2025-10-01 03:29:58.488720", + "step": 7152, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.524392", + "step": 7152, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018436965183354914, + "timestamp": "2025-10-01 03:29:58.527076", + "step": 7153, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.567714", + "step": 7153, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0027568857185542583, + "timestamp": "2025-10-01 03:29:58.572629", + "step": 7154, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.610319", + "step": 7154, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.96046806499362e-05, + "timestamp": "2025-10-01 03:29:58.613409", + "step": 7155, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.654346", + "step": 7155, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.717983342241496e-05, + "timestamp": "2025-10-01 03:29:58.678538", + "step": 7156, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.724382", + "step": 7156, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.890023360028863e-05, + "timestamp": "2025-10-01 03:29:58.731308", + "step": 7157, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.766709", + "step": 7157, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010780019511003047, + "timestamp": "2025-10-01 03:29:58.772363", + "step": 7158, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.810260", + "step": 7158, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013278782716952264, + "timestamp": "2025-10-01 03:29:58.815179", + "step": 7159, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:58.855375", + "step": 7159, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018339090456720442, + "timestamp": "2025-10-01 03:29:58.884025", + "step": 7160, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:58.927703", + "step": 7160, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002868177543859929, + "timestamp": "2025-10-01 03:29:58.933312", + "step": 7161, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:58.972878", + "step": 7161, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011158118955790997, + "timestamp": "2025-10-01 03:29:58.975658", + "step": 7162, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.014182", + "step": 7162, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007514886558055878, + "timestamp": "2025-10-01 03:29:59.020141", + "step": 7163, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:29:59.060270", + "step": 7163, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002454655186738819, + "timestamp": "2025-10-01 03:29:59.091661", + "step": 7164, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.129854", + "step": 7164, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.197652015136555e-05, + "timestamp": "2025-10-01 03:29:59.137764", + "step": 7165, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.175608", + "step": 7165, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010949772695312276, + "timestamp": "2025-10-01 03:29:59.180744", + "step": 7166, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.217110", + "step": 7166, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.035285867750644684, + "timestamp": "2025-10-01 03:29:59.221130", + "step": 7167, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.263916", + "step": 7167, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.60471443249844e-05, + "timestamp": "2025-10-01 03:29:59.290242", + "step": 7168, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.329742", + "step": 7168, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.608918531332165e-05, + "timestamp": "2025-10-01 03:29:59.335259", + "step": 7169, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.373611", + "step": 7169, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004420013166964054, + "timestamp": "2025-10-01 03:29:59.378642", + "step": 7170, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:29:59.415586", + "step": 7170, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016452744603157043, + "timestamp": "2025-10-01 03:29:59.420526", + "step": 7171, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.457288", + "step": 7171, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.876863360754214e-05, + "timestamp": "2025-10-01 03:29:59.484143", + "step": 7172, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.521418", + "step": 7172, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012503889156505466, + "timestamp": "2025-10-01 03:29:59.526522", + "step": 7173, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.560698", + "step": 7173, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002564939495641738, + "timestamp": "2025-10-01 03:29:59.565258", + "step": 7174, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.602608", + "step": 7174, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011162442155182362, + "timestamp": "2025-10-01 03:29:59.607829", + "step": 7175, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.643869", + "step": 7175, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008158196578733623, + "timestamp": "2025-10-01 03:29:59.667996", + "step": 7176, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.704229", + "step": 7176, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03726668283343315, + "timestamp": "2025-10-01 03:29:59.707019", + "step": 7177, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.748995", + "step": 7177, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011110719060525298, + "timestamp": "2025-10-01 03:29:59.755520", + "step": 7178, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.792133", + "step": 7178, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011564478336367756, + "timestamp": "2025-10-01 03:29:59.797605", + "step": 7179, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.832729", + "step": 7179, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018514669500291348, + "timestamp": "2025-10-01 03:29:59.859519", + "step": 7180, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.918521", + "step": 7180, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003658804052975029, + "timestamp": "2025-10-01 03:29:59.923794", + "step": 7181, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:29:59.971162", + "step": 7181, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02123548462986946, + "timestamp": "2025-10-01 03:29:59.977747", + "step": 7182, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.015536", + "step": 7182, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001281781733268872, + "timestamp": "2025-10-01 03:30:00.021855", + "step": 7183, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.059465", + "step": 7183, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013179165543988347, + "timestamp": "2025-10-01 03:30:00.088398", + "step": 7184, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:30:00.124935", + "step": 7184, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03257348760962486, + "timestamp": "2025-10-01 03:30:00.130777", + "step": 7185, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.171255", + "step": 7185, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019047049863729626, + "timestamp": "2025-10-01 03:30:00.180343", + "step": 7186, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.223885", + "step": 7186, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.240121390670538e-05, + "timestamp": "2025-10-01 03:30:00.231954", + "step": 7187, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:30:00.268988", + "step": 7187, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023918833176139742, + "timestamp": "2025-10-01 03:30:00.299631", + "step": 7188, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.337386", + "step": 7188, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022467222879640758, + "timestamp": "2025-10-01 03:30:00.345338", + "step": 7189, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.390324", + "step": 7189, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.764232966816053e-05, + "timestamp": "2025-10-01 03:30:00.396574", + "step": 7190, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:30:00.444545", + "step": 7190, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000526757852640003, + "timestamp": "2025-10-01 03:30:00.450760", + "step": 7191, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.495795", + "step": 7191, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001629380276426673, + "timestamp": "2025-10-01 03:30:00.523513", + "step": 7192, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.564529", + "step": 7192, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001261840370716527, + "timestamp": "2025-10-01 03:30:00.573498", + "step": 7193, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.618072", + "step": 7193, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003154147998429835, + "timestamp": "2025-10-01 03:30:00.626229", + "step": 7194, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.679253", + "step": 7194, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017511827172711492, + "timestamp": "2025-10-01 03:30:00.687452", + "step": 7195, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.726593", + "step": 7195, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042804647819139063, + "timestamp": "2025-10-01 03:30:00.758757", + "step": 7196, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.799943", + "step": 7196, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.746629853500053e-05, + "timestamp": "2025-10-01 03:30:00.809232", + "step": 7197, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.864079", + "step": 7197, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014326372183859348, + "timestamp": "2025-10-01 03:30:00.871131", + "step": 7198, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.914228", + "step": 7198, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006844449671916664, + "timestamp": "2025-10-01 03:30:00.923000", + "step": 7199, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:00.972797", + "step": 7199, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031057558953762054, + "timestamp": "2025-10-01 03:30:01.003186", + "step": 7200, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.044518", + "step": 7200, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015974252892192453, + "timestamp": "2025-10-01 03:30:01.051792", + "step": 7201, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.089776", + "step": 7201, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001545247941976413, + "timestamp": "2025-10-01 03:30:01.099596", + "step": 7202, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.138614", + "step": 7202, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045650065294466913, + "timestamp": "2025-10-01 03:30:01.147159", + "step": 7203, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:30:01.185236", + "step": 7203, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004280607681721449, + "timestamp": "2025-10-01 03:30:01.214891", + "step": 7204, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.254583", + "step": 7204, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007987224380485713, + "timestamp": "2025-10-01 03:30:01.262151", + "step": 7205, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.303079", + "step": 7205, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024351666797883809, + "timestamp": "2025-10-01 03:30:01.305784", + "step": 7206, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:30:01.348303", + "step": 7206, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005880386452190578, + "timestamp": "2025-10-01 03:30:01.352479", + "step": 7207, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.401119", + "step": 7207, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.020735694095492363, + "timestamp": "2025-10-01 03:30:01.426603", + "step": 7208, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.460525", + "step": 7208, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015408467152155936, + "timestamp": "2025-10-01 03:30:01.464755", + "step": 7209, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.506510", + "step": 7209, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007477857172489166, + "timestamp": "2025-10-01 03:30:01.511578", + "step": 7210, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.549753", + "step": 7210, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00576540594920516, + "timestamp": "2025-10-01 03:30:01.554164", + "step": 7211, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.594251", + "step": 7211, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044730320223607123, + "timestamp": "2025-10-01 03:30:01.618961", + "step": 7212, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.656075", + "step": 7212, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.63713027886115e-05, + "timestamp": "2025-10-01 03:30:01.662571", + "step": 7213, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.706548", + "step": 7213, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.038533858954906464, + "timestamp": "2025-10-01 03:30:01.710852", + "step": 7214, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.744796", + "step": 7214, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044366318616084754, + "timestamp": "2025-10-01 03:30:01.753665", + "step": 7215, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:30:01.792449", + "step": 7215, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002750809071585536, + "timestamp": "2025-10-01 03:30:01.819837", + "step": 7216, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.854006", + "step": 7216, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007110475562512875, + "timestamp": "2025-10-01 03:30:01.860468", + "step": 7217, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:30:01.899315", + "step": 7217, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02140149660408497, + "timestamp": "2025-10-01 03:30:01.904323", + "step": 7218, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:30:01.942247", + "step": 7218, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035237433621659875, + "timestamp": "2025-10-01 03:30:01.954183", + "step": 7219, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:01.989708", + "step": 7219, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008215964771807194, + "timestamp": "2025-10-01 03:30:02.018376", + "step": 7220, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:30:02.065774", + "step": 7220, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011245991481700912, + "timestamp": "2025-10-01 03:30:02.070401", + "step": 7221, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:02.113905", + "step": 7221, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015886923065409064, + "timestamp": "2025-10-01 03:30:02.118688", + "step": 7222, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:02.159215", + "step": 7222, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002213534025941044, + "timestamp": "2025-10-01 03:30:02.163371", + "step": 7223, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:02.210666", + "step": 7223, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002293026482220739, + "timestamp": "2025-10-01 03:30:02.236654", + "step": 7224, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:02.281120", + "step": 7224, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.657097739866003e-05, + "timestamp": "2025-10-01 03:30:02.285028", + "step": 7225, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-10-01 03:30:02.325468", + "step": 7225, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00528408819809556, + "timestamp": "2025-10-01 03:30:02.329619", + "step": 7226, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:02.366690", + "step": 7226, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.086196324555203e-05, + "timestamp": "2025-10-01 03:30:02.375874", + "step": 7227, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:02.416462", + "step": 7227, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010499086929485202, + "timestamp": "2025-10-01 03:30:02.447372", + "step": 7228, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:30:02.493232", + "step": 7228, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020516056974884123, + "timestamp": "2025-10-01 03:30:02.502941", + "step": 7229, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:30:02.548754", + "step": 7229, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007066589896567166, + "timestamp": "2025-10-01 03:30:02.558090", + "step": 7230, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:02.623111", + "step": 7230, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004734946123789996, + "timestamp": "2025-10-01 03:30:02.634774", + "step": 7231, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:02.678288", + "step": 7231, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004740066826343536, + "timestamp": "2025-10-01 03:30:02.712696", + "step": 7232, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:02.764081", + "step": 7232, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020822805527132004, + "timestamp": "2025-10-01 03:30:02.768353", + "step": 7233, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:02.818288", + "step": 7233, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031190566369332373, + "timestamp": "2025-10-01 03:30:02.827290", + "step": 7234, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:02.867885", + "step": 7234, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008437315002083778, + "timestamp": "2025-10-01 03:30:02.878042", + "step": 7235, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:02.917248", + "step": 7235, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010150170419365168, + "timestamp": "2025-10-01 03:30:02.946852", + "step": 7236, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:02.986058", + "step": 7236, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007805302157066762, + "timestamp": "2025-10-01 03:30:02.989906", + "step": 7237, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.030504", + "step": 7237, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005591553053818643, + "timestamp": "2025-10-01 03:30:03.039029", + "step": 7238, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.078689", + "step": 7238, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001919764472404495, + "timestamp": "2025-10-01 03:30:03.086890", + "step": 7239, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.130080", + "step": 7239, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012049656361341476, + "timestamp": "2025-10-01 03:30:03.160166", + "step": 7240, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.203342", + "step": 7240, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023508688900619745, + "timestamp": "2025-10-01 03:30:03.213753", + "step": 7241, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.257582", + "step": 7241, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000163990436703898, + "timestamp": "2025-10-01 03:30:03.266966", + "step": 7242, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.311283", + "step": 7242, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001512998715043068, + "timestamp": "2025-10-01 03:30:03.320947", + "step": 7243, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.364813", + "step": 7243, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.320509030250832e-05, + "timestamp": "2025-10-01 03:30:03.393386", + "step": 7244, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.430925", + "step": 7244, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.020104900002479553, + "timestamp": "2025-10-01 03:30:03.438746", + "step": 7245, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:30:03.479152", + "step": 7245, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005832515307702124, + "timestamp": "2025-10-01 03:30:03.488783", + "step": 7246, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.529302", + "step": 7246, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005659735761582851, + "timestamp": "2025-10-01 03:30:03.536319", + "step": 7247, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.577366", + "step": 7247, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002552461810410023, + "timestamp": "2025-10-01 03:30:03.608288", + "step": 7248, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:30:03.647841", + "step": 7248, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010236313100904226, + "timestamp": "2025-10-01 03:30:03.654434", + "step": 7249, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.701843", + "step": 7249, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.939574374584481e-05, + "timestamp": "2025-10-01 03:30:03.707752", + "step": 7250, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.745819", + "step": 7250, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008382050436921418, + "timestamp": "2025-10-01 03:30:03.754151", + "step": 7251, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.796617", + "step": 7251, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010013612918555737, + "timestamp": "2025-10-01 03:30:03.829013", + "step": 7252, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.872035", + "step": 7252, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011906380765140057, + "timestamp": "2025-10-01 03:30:03.883332", + "step": 7253, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:03.934684", + "step": 7253, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010456440941197798, + "timestamp": "2025-10-01 03:30:03.938804", + "step": 7254, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:30:03.973948", + "step": 7254, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007239511702209711, + "timestamp": "2025-10-01 03:30:03.981876", + "step": 7255, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.023446", + "step": 7255, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020486125722527504, + "timestamp": "2025-10-01 03:30:04.057298", + "step": 7256, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.107877", + "step": 7256, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018649054982233793, + "timestamp": "2025-10-01 03:30:04.111159", + "step": 7257, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.148425", + "step": 7257, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.728506170678884e-05, + "timestamp": "2025-10-01 03:30:04.154149", + "step": 7258, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:30:04.191673", + "step": 7258, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.152161950012669e-05, + "timestamp": "2025-10-01 03:30:04.199333", + "step": 7259, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.240533", + "step": 7259, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005068095633760095, + "timestamp": "2025-10-01 03:30:04.270693", + "step": 7260, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.313666", + "step": 7260, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002424482721835375, + "timestamp": "2025-10-01 03:30:04.321283", + "step": 7261, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.366784", + "step": 7261, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045504511217586696, + "timestamp": "2025-10-01 03:30:04.375150", + "step": 7262, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:30:04.413384", + "step": 7262, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009955146815627813, + "timestamp": "2025-10-01 03:30:04.421010", + "step": 7263, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.460247", + "step": 7263, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005814371979795396, + "timestamp": "2025-10-01 03:30:04.489568", + "step": 7264, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.530322", + "step": 7264, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017737429880071431, + "timestamp": "2025-10-01 03:30:04.540470", + "step": 7265, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.581162", + "step": 7265, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008109747432172298, + "timestamp": "2025-10-01 03:30:04.591452", + "step": 7266, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.634957", + "step": 7266, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007840123726055026, + "timestamp": "2025-10-01 03:30:04.642066", + "step": 7267, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.681521", + "step": 7267, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.522551408736035e-05, + "timestamp": "2025-10-01 03:30:04.710619", + "step": 7268, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:30:04.759885", + "step": 7268, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005993967060931027, + "timestamp": "2025-10-01 03:30:04.766802", + "step": 7269, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.818194", + "step": 7269, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.455845818389207e-05, + "timestamp": "2025-10-01 03:30:04.824848", + "step": 7270, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.865061", + "step": 7270, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017697911243885756, + "timestamp": "2025-10-01 03:30:04.871609", + "step": 7271, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 80 + ], + "flops": 2373281365952 + }, + "timestamp": "2025-10-01 03:30:04.921755", + "step": 7271, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005536010023206472, + "timestamp": "2025-10-01 03:30:04.949659", + "step": 7272, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:04.988180", + "step": 7272, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009724675677716732, + "timestamp": "2025-10-01 03:30:04.990879", + "step": 7273, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.025605", + "step": 7273, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024731954908929765, + "timestamp": "2025-10-01 03:30:05.032934", + "step": 7274, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.070918", + "step": 7274, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009564078412950039, + "timestamp": "2025-10-01 03:30:05.077237", + "step": 7275, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.120385", + "step": 7275, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014130461495369673, + "timestamp": "2025-10-01 03:30:05.151613", + "step": 7276, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 112 + ], + "flops": 3322488817984 + }, + "timestamp": "2025-10-01 03:30:05.197127", + "step": 7276, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038377122837118804, + "timestamp": "2025-10-01 03:30:05.208368", + "step": 7277, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.250391", + "step": 7277, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025720198755152524, + "timestamp": "2025-10-01 03:30:05.261102", + "step": 7278, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.302926", + "step": 7278, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001948661811184138, + "timestamp": "2025-10-01 03:30:05.313551", + "step": 7279, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.351729", + "step": 7279, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020920539100188762, + "timestamp": "2025-10-01 03:30:05.379933", + "step": 7280, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.422859", + "step": 7280, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030039416742511094, + "timestamp": "2025-10-01 03:30:05.432337", + "step": 7281, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.470860", + "step": 7281, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.020186321809887886, + "timestamp": "2025-10-01 03:30:05.473749", + "step": 7282, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.515799", + "step": 7282, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023064943961799145, + "timestamp": "2025-10-01 03:30:05.523349", + "step": 7283, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.562519", + "step": 7283, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013704532466363162, + "timestamp": "2025-10-01 03:30:05.591712", + "step": 7284, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.630971", + "step": 7284, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012930730008520186, + "timestamp": "2025-10-01 03:30:05.639333", + "step": 7285, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.691189", + "step": 7285, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011818510247394443, + "timestamp": "2025-10-01 03:30:05.699606", + "step": 7286, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.745787", + "step": 7286, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.463981066597626e-05, + "timestamp": "2025-10-01 03:30:05.757301", + "step": 7287, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.799342", + "step": 7287, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003649913123808801, + "timestamp": "2025-10-01 03:30:05.827924", + "step": 7288, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.873360", + "step": 7288, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.686566434334964e-05, + "timestamp": "2025-10-01 03:30:05.880048", + "step": 7289, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.926203", + "step": 7289, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006158145260997117, + "timestamp": "2025-10-01 03:30:05.932236", + "step": 7290, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:05.971246", + "step": 7290, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.849959754617885e-05, + "timestamp": "2025-10-01 03:30:05.977572", + "step": 7291, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:06.018275", + "step": 7291, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05610330030322075, + "timestamp": "2025-10-01 03:30:06.047602", + "step": 7292, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:06.085342", + "step": 7292, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004665467713493854, + "timestamp": "2025-10-01 03:30:06.089307", + "step": 7293, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:06.133544", + "step": 7293, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001442523207515478, + "timestamp": "2025-10-01 03:30:06.139924", + "step": 7294, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:06.181178", + "step": 7294, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018113328842446208, + "timestamp": "2025-10-01 03:30:06.186394", + "step": 7295, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:06.240679", + "step": 7295, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015441712457686663, + "timestamp": "2025-10-01 03:30:06.272010", + "step": 7296, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:30:09.171711", + "step": 7296, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2327153.504939905, + "timestamp": "2025-10-01 03:30:09.177562", + "step": 7296, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:09.210525", + "step": 7296, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003898380964528769, + "timestamp": "2025-10-01 03:30:09.215151", + "step": 7297, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:09.252000", + "step": 7297, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011941769625991583, + "timestamp": "2025-10-01 03:30:09.256975", + "step": 7298, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:09.297332", + "step": 7298, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015088495274540037, + "timestamp": "2025-10-01 03:30:09.306208", + "step": 7299, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:09.345566", + "step": 7299, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008842440438456833, + "timestamp": "2025-10-01 03:30:09.373342", + "step": 7300, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:09.408848", + "step": 7300, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.051468513906002045, + "timestamp": "2025-10-01 03:30:09.420156", + "step": 7301, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:09.460426", + "step": 7301, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021139559976290911, + "timestamp": "2025-10-01 03:30:09.462955", + "step": 7302, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:09.503702", + "step": 7302, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043817347614094615, + "timestamp": "2025-10-01 03:30:09.510195", + "step": 7303, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:09.553378", + "step": 7303, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003015237394720316, + "timestamp": "2025-10-01 03:30:09.579000", + "step": 7304, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:09.632886", + "step": 7304, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002263280184706673, + "timestamp": "2025-10-01 03:30:09.637718", + "step": 7305, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:09.703903", + "step": 7305, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012932243407703936, + "timestamp": "2025-10-01 03:30:09.712177", + "step": 7306, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 96 + ], + "flops": 2847885091968 + }, + "timestamp": "2025-10-01 03:30:09.751780", + "step": 7306, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.946250065695494e-05, + "timestamp": "2025-10-01 03:30:09.756174", + "step": 7307, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 1, + 80 + ], + "flops": 593517404912 + }, + "timestamp": "2025-10-01 03:30:09.797234", + "step": 7307, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.4741694030817598e-05, + "timestamp": "2025-10-01 03:30:09.823889", + "step": 7308, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 48 + ], + "batch_size": 8, + "flops": 949202279808 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 96 + ], + "batch_size": 8, + "flops": 1898404492032 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 64 + ], + "batch_size": 8, + "flops": 1265603017216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 5, + 80 + ], + "batch_size": 8, + "flops": 1582003754624 + } + ], + "timestamp": "2025-10-01 03:30:12.675661", + "step": 7308, + "epoch": 3 + }, + { + "type": "pplx", + "content": 2313149.2387858387, + "timestamp": "2025-10-01 03:30:12.682622", + "step": 7308, + "epoch": 3 + }, + { + "type": "best_pplx", + "content": 1791683.787394418, + "timestamp": "2025-10-01 03:30:12.689088", + "step": 7308, + "epoch": 3 + }, + { + "type": "best_step", + "content": 5016, + "timestamp": "2025-10-01 03:30:12.695803", + "step": 7308, + "epoch": 3 + }, + { + "type": "total_pplx_flops", + "content": 9808423376665600, + "timestamp": "2025-10-01 03:30:12.702235", + "step": 7308, + "epoch": 3 + }, + { + "type": "total_train_flops", + "content": 20896705064436048, + "timestamp": "2025-10-01 03:30:12.707651", + "step": 7308, + "epoch": 3 + } + ], + "best_evals": { + "pplx": { + "score": 1791683.787394418, + "step": 5016 + }, + "rougel": { + "precision": 0.7100737100737101, + "recall": 0.7100737100737101, + "fmeasure": 0.7100737100737101 + } + } +} \ No newline at end of file