diff --git a/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/files/config.yaml b/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/files/config.yaml index 82b409e3f8c89f1ddbaf5f5fe96ad8091f08f7cf..b38d28c2215d5b58671935661dcb7c1ba852830f 100644 --- a/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/files/config.yaml +++ b/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/files/config.yaml @@ -30,9 +30,11 @@ _wandb: - 71 - 105 3: + - 2 - 4 - 13 - 14 + - 37 - 42 - 61 4: 3.11.10 diff --git a/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/files/output.log b/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/files/output.log index c6b45ea72a7159f0a319538ecc3291daa2711c32..16ad7056f3deee3c3f2427a86c20c97747f43395 100644 --- a/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/files/output.log +++ b/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/files/output.log @@ -1,3 +1,196 @@ +FullyShardedDataParallel( + (_fsdp_wrapped_module): Bagel( + (language_model): Qwen2ForCausalLM( + (model): Qwen2Model( + (embed_tokens): Embedding(152064, 3584) + (layers): ModuleList( + (0-27): 28 x FullyShardedDataParallel( + (_fsdp_wrapped_module): CheckpointWrapper( + (_checkpoint_wrapped_module): Qwen2MoTDecoderLayer( + (self_attn): PackedAttentionMoT( + (q_proj): Linear(in_features=3584, out_features=3584, bias=True) + (k_proj): Linear(in_features=3584, out_features=512, bias=True) + (v_proj): Linear(in_features=3584, out_features=512, bias=True) + (o_proj): Linear(in_features=3584, out_features=3584, bias=False) + (q_norm): Qwen2RMSNorm((128,), eps=1e-06) + (k_norm): Qwen2RMSNorm((128,), eps=1e-06) + (q_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06) + (k_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06) + (q_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=True) + (k_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True) + (v_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True) + (o_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=False) + ) + (mlp): Qwen2MLP( + (gate_proj): Linear(in_features=3584, out_features=18944, bias=False) + (up_proj): Linear(in_features=3584, out_features=18944, bias=False) + (down_proj): Linear(in_features=18944, out_features=3584, bias=False) + (act_fn): SiLU() + ) + (mlp_moe_gen): Qwen2MLP( + (gate_proj): Linear(in_features=3584, out_features=18944, bias=False) + (up_proj): Linear(in_features=3584, out_features=18944, bias=False) + (down_proj): Linear(in_features=18944, out_features=3584, bias=False) + (act_fn): SiLU() + ) + (input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06) + (input_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06) + (post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06) + (post_attention_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06) + ) + ) + ) + ) + (norm): Qwen2RMSNorm((3584,), eps=1e-06) + (norm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06) + (rotary_emb): Qwen2RotaryEmbedding() + ) + (lm_head): Linear(in_features=3584, out_features=152064, bias=False) + ) + (time_embedder): FullyShardedDataParallel( + (_fsdp_wrapped_module): TimestepEmbedder( + (mlp): Sequential( + (0): Linear(in_features=256, out_features=3584, bias=True) + (1): SiLU() + (2): Linear(in_features=3584, out_features=3584, bias=True) + ) + ) + ) + (vae2llm): Linear(in_features=64, out_features=3584, bias=True) + (llm2vae): Linear(in_features=3584, out_features=64, bias=True) + (latent_pos_embed): FullyShardedDataParallel( + (_fsdp_wrapped_module): PositionEmbedding() + ) + (vit_model): SiglipVisionModel( + (vision_model): FullyShardedDataParallel( + (_fsdp_wrapped_module): SiglipVisionTransformer( + (embeddings): SiglipVisionEmbeddings( + (position_embedding): Embedding(4900, 1152) + (patch_embedding): Linear(in_features=588, out_features=1152, bias=True) + ) + (encoder): SiglipEncoder( + (layers): ModuleList( + (0-25): 26 x FullyShardedDataParallel( + (_fsdp_wrapped_module): CheckpointWrapper( + (_checkpoint_wrapped_module): SiglipEncoderLayer( + (self_attn): SiglipFlashAttention2( + (k_proj): Linear(in_features=1152, out_features=1152, bias=True) + (v_proj): Linear(in_features=1152, out_features=1152, bias=True) + (q_proj): Linear(in_features=1152, out_features=1152, bias=True) + (out_proj): Linear(in_features=1152, out_features=1152, bias=True) + ) + (layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True) + (mlp): SiglipMLP( + (activation_fn): PytorchGELUTanh() + (fc1): Linear(in_features=1152, out_features=4304, bias=True) + (fc2): Linear(in_features=4304, out_features=1152, bias=True) + ) + (layer_norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True) + ) + ) + ) + ) + ) + (post_layernorm): LayerNorm((1152,), eps=1e-06, elementwise_affine=True) + ) + ) + ) + (connector): FullyShardedDataParallel( + (_fsdp_wrapped_module): CheckpointWrapper( + (_checkpoint_wrapped_module): MLPconnector( + (activation_fn): PytorchGELUTanh() + (fc1): Linear(in_features=1152, out_features=3584, bias=True) + (fc2): Linear(in_features=3584, out_features=3584, bias=True) + ) + ) + ) + (vit_pos_embed): FullyShardedDataParallel( + (_fsdp_wrapped_module): PositionEmbedding() + ) + ) +) +_flat_param True +language_model.model.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.26._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +language_model.model.layers.27._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +time_embedder._fsdp_wrapped_module._flat_param True +latent_pos_embed._fsdp_wrapped_module._flat_param False +vit_model.vision_model._fsdp_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_model.vision_model._fsdp_wrapped_module.encoder.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +connector._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True +vit_pos_embed._fsdp_wrapped_module._flat_param False +Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only/vlm_gym_match_equation_sos_train +base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step0 +Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val +[eval debug] first 3 batch fingerprints: + fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] +ce_avg: 0.0, mse_avg: 0.6639004349708557 +base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step500 +Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val +[eval debug] first 3 batch fingerprints: + fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] +ce_avg: 0.0, mse_avg: 0.013862529769539833 +base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step1000 +Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val +[eval debug] first 3 batch fingerprints: + fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] +ce_avg: 0.0, mse_avg: 0.011223526671528816 wandb: Detected [huggingface_hub.inference] in use. wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ @@ -1042,213 +1235,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ [2026-01-27 02:22:17] (step=0001031) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, [2026-01-27 02:22:19] (step=0001032) Train Loss mse: 0.0097, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, [2026-01-27 02:22:21] (step=0001033) Train Loss mse: 0.0086, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, -FullyShardedDataParallel( - (_fsdp_wrapped_module): Bagel( - (language_model): Qwen2ForCausalLM( - (model): Qwen2Model( - (embed_tokens): Embedding(152064, 3584) - (layers): ModuleList( - (0-27): 28 x FullyShardedDataParallel( - (_fsdp_wrapped_module): CheckpointWrapper( - (_checkpoint_wrapped_module): Qwen2MoTDecoderLayer( - (self_attn): PackedAttentionMoT( - (q_proj): Linear(in_features=3584, out_features=3584, bias=True) - (k_proj): Linear(in_features=3584, out_features=512, bias=True) - (v_proj): Linear(in_features=3584, out_features=512, bias=True) - (o_proj): Linear(in_features=3584, out_features=3584, bias=False) - (q_norm): Qwen2RMSNorm((128,), eps=1e-06) - (k_norm): Qwen2RMSNorm((128,), eps=1e-06) - (q_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06) - (k_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06) - (q_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=True) - (k_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True) - (v_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True) - (o_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=False) - ) - (mlp): Qwen2MLP( - (gate_proj): Linear(in_features=3584, out_features=18944, bias=False) - (up_proj): Linear(in_features=3584, out_features=18944, bias=False) - (down_proj): Linear(in_features=18944, out_features=3584, bias=False) - (act_fn): SiLU() - ) - (mlp_moe_gen): Qwen2MLP( - (gate_proj): Linear(in_features=3584, out_features=18944, bias=False) - (up_proj): Linear(in_features=3584, out_features=18944, bias=False) - (down_proj): Linear(in_features=18944, out_features=3584, bias=False) - (act_fn): SiLU() - ) - (input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06) - (input_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06) - (post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06) - (post_attention_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06) - ) - ) - ) - ) - (norm): Qwen2RMSNorm((3584,), eps=1e-06) - (norm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06) - (rotary_emb): Qwen2RotaryEmbedding() - ) - (lm_head): Linear(in_features=3584, out_features=152064, bias=False) - ) - (time_embedder): FullyShardedDataParallel( - (_fsdp_wrapped_module): TimestepEmbedder( - (mlp): Sequential( - (0): Linear(in_features=256, out_features=3584, bias=True) - (1): SiLU() - (2): Linear(in_features=3584, out_features=3584, bias=True) - ) - ) - ) - (vae2llm): Linear(in_features=64, out_features=3584, bias=True) - (llm2vae): Linear(in_features=3584, out_features=64, bias=True) - (latent_pos_embed): FullyShardedDataParallel( - (_fsdp_wrapped_module): PositionEmbedding() - ) - (vit_model): SiglipVisionModel( - (vision_model): FullyShardedDataParallel( - (_fsdp_wrapped_module): SiglipVisionTransformer( - (embeddings): SiglipVisionEmbeddings( - (position_embedding): Embedding(4900, 1152) - (patch_embedding): Linear(in_features=588, out_features=1152, bias=True) - ) - (encoder): SiglipEncoder( - (layers): ModuleList( - (0-25): 26 x FullyShardedDataParallel( - (_fsdp_wrapped_module): CheckpointWrapper( - (_checkpoint_wrapped_module): SiglipEncoderLayer( - (self_attn): SiglipFlashAttention2( - (k_proj): Linear(in_features=1152, out_features=1152, bias=True) - (v_proj): Linear(in_features=1152, out_features=1152, bias=True) - (q_proj): Linear(in_features=1152, out_features=1152, bias=True) - (out_proj): Linear(in_features=1152, out_features=1152, bias=True) - ) - (layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True) - (mlp): SiglipMLP( - (activation_fn): PytorchGELUTanh() - (fc1): Linear(in_features=1152, out_features=4304, bias=True) - (fc2): Linear(in_features=4304, out_features=1152, bias=True) - ) - (layer_norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True) - ) - ) - ) - ) - ) - (post_layernorm): LayerNorm((1152,), eps=1e-06, elementwise_affine=True) - ) - ) - ) - (connector): FullyShardedDataParallel( - (_fsdp_wrapped_module): CheckpointWrapper( - (_checkpoint_wrapped_module): MLPconnector( - (activation_fn): PytorchGELUTanh() - (fc1): Linear(in_features=1152, out_features=3584, bias=True) - (fc2): Linear(in_features=3584, out_features=3584, bias=True) - ) - ) - ) - (vit_pos_embed): FullyShardedDataParallel( - (_fsdp_wrapped_module): PositionEmbedding() - ) - ) -) -_flat_param True -language_model.model.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.26._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -language_model.model.layers.27._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -time_embedder._fsdp_wrapped_module._flat_param True -latent_pos_embed._fsdp_wrapped_module._flat_param False -vit_model.vision_model._fsdp_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_model.vision_model._fsdp_wrapped_module.encoder.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -connector._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True -vit_pos_embed._fsdp_wrapped_module._flat_param False -Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only/vlm_gym_match_equation_sos_train -base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step0 -Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val -[eval debug] first 3 batch fingerprints: - fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] - fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] - fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] -ce_avg: 0.0, mse_avg: 0.6639004349708557 -base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step500 -Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val -[eval debug] first 3 batch fingerprints: - fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] - fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] - fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] -ce_avg: 0.0, mse_avg: 0.013862529769539833 -base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step1000 -Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val -[eval debug] first 3 batch fingerprints: - fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] - fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] - fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] -ce_avg: 0.0, mse_avg: 0.011223526671528816 -base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step1500 -Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val -[eval debug] first 3 batch fingerprints: - fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] - fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] - fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] -ce_avg: 0.0, mse_avg: 0.009812634438276291 -base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step2000 -Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val -[eval debug] first 3 batch fingerprints: - fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] - fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] - fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] -ce_avg: 0.0, mse_avg: 0.010432669892907143 [2026-01-27 02:22:22] (step=0001034) Train Loss mse: 0.0079, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, [2026-01-27 02:22:24] (step=0001035) Train Loss mse: 0.0096, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, [2026-01-27 02:22:25] (step=0001036) Train Loss mse: 0.0097, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, @@ -2494,7 +2480,42 @@ ce_avg: 0.0, mse_avg: 0.010432669892907143 [2026-01-27 02:55:35] (step=0002276) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, [2026-01-27 02:55:37] (step=0002277) Train Loss mse: 0.0080, Train Loss ce: 0.0000, Train Steps/Sec: 0.50, [2026-01-27 02:55:38] (step=0002278) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.60, -[2026-01-27 02:55:40] (step=0002279) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 02:55:40 +base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step1500 +Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val +[eval debug] first 3 batch fingerprints: + fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] +ce_avg: 0.0, mse_avg: 0.009812634438276291 +base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step2000 +Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val +[eval debug] first 3 batch fingerprints: + fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] +ce_avg: 0.0, mse_avg: 0.010432669892907143 +base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step2500 +Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val +[eval debug] first 3 batch fingerprints: + fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] +ce_avg: 0.0, mse_avg: 0.01046404242515564 +base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step3000 +Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val +[eval debug] first 3 batch fingerprints: + fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] +ce_avg: 0.0, mse_avg: 0.01004165131598711 +base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step3500 +Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val +[eval debug] first 3 batch fingerprints: + fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] +[2026-01-27 03:30:10] (step=0003555) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, [2026-01-27 02:55:41] (step=0002280) Train Loss mse: 0.0083, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, [2026-01-27 02:55:43] (step=0002281) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, [2026-01-27 02:55:44] (step=0002282) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, @@ -2686,13 +2707,6 @@ ce_avg: 0.0, mse_avg: 0.010432669892907143 [2026-01-27 03:00:48] (step=0002468) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.50, [2026-01-27 03:00:50] (step=0002469) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.53, [2026-01-27 03:00:52] (step=0002470) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, -base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step2500 -Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val -[eval debug] first 3 batch fingerprints: - fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] - fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] - fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] -ce_avg: 0.0, mse_avg: 0.01046404242515564 [2026-01-27 03:00:53] (step=0002471) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.60, [2026-01-27 03:00:55] (step=0002472) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, [2026-01-27 03:00:56] (step=0002473) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, @@ -2967,4 +2981,2284 @@ ce_avg: 0.0, mse_avg: 0.01046404242515564 [2026-01-27 03:08:10] (step=0002742) Train Loss mse: 0.0090, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, [2026-01-27 03:08:11] (step=0002743) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, [2026-01-27 03:08:13] (step=0002744) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, -[2026-01-27 03:08:14] (step=0002745) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, \ No newline at end of file +[2026-01-27 03:08:14] (step=0002745) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:16] (step=0002746) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:17] (step=0002747) Train Loss mse: 0.0028, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:08:19] (step=0002748) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:08:21] (step=0002749) Train Loss mse: 0.0128, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:08:23] (step=0002750) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:08:24] (step=0002751) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:08:26] (step=0002752) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:27] (step=0002753) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:29] (step=0002754) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:30] (step=0002755) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:32] (step=0002756) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:34] (step=0002757) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:08:35] (step=0002758) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:37] (step=0002759) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:08:39] (step=0002760) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:08:40] (step=0002761) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:42] (step=0002762) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:43] (step=0002763) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:45] (step=0002764) Train Loss mse: 0.0079, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:46] (step=0002765) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.66, +[2026-01-27 03:08:48] (step=0002766) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.50, +[2026-01-27 03:08:50] (step=0002767) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:08:52] (step=0002768) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:08:54] (step=0002769) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:08:55] (step=0002770) Train Loss mse: 0.0083, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:57] (step=0002771) Train Loss mse: 0.0084, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:08:58] (step=0002772) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:00] (step=0002773) Train Loss mse: 0.0102, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:09:01] (step=0002774) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:09:03] (step=0002775) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:05] (step=0002776) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:09:07] (step=0002777) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:09:08] (step=0002778) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:10] (step=0002779) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:11] (step=0002780) Train Loss mse: 0.0084, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:13] (step=0002781) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:09:14] (step=0002782) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:16] (step=0002783) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:09:18] (step=0002784) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:09:20] (step=0002785) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:09:21] (step=0002786) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:09:23] (step=0002787) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:24] (step=0002788) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:26] (step=0002789) Train Loss mse: 0.0091, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:09:27] (step=0002790) Train Loss mse: 0.0075, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:29] (step=0002791) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:09:31] (step=0002792) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:33] (step=0002793) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:09:34] (step=0002794) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:09:36] (step=0002795) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:37] (step=0002796) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:09:39] (step=0002797) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:41] (step=0002798) Train Loss mse: 0.0079, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:09:42] (step=0002799) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:09:44] (step=0002800) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:09:46] (step=0002801) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:09:47] (step=0002802) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:49] (step=0002803) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:50] (step=0002804) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:09:52] (step=0002805) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:54] (step=0002806) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:09:55] (step=0002807) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:57] (step=0002808) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:09:59] (step=0002809) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:10:00] (step=0002810) Train Loss mse: 0.0087, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:02] (step=0002811) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:10:03] (step=0002812) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:05] (step=0002813) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:10:07] (step=0002814) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:10:08] (step=0002815) Train Loss mse: 0.0080, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:10] (step=0002816) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:10:12] (step=0002817) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:13] (step=0002818) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:14] (step=0002819) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:16] (step=0002820) Train Loss mse: 0.0090, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:18] (step=0002821) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:10:19] (step=0002822) Train Loss mse: 0.0092, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:21] (step=0002823) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:23] (step=0002824) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:10:24] (step=0002825) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:26] (step=0002826) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:27] (step=0002827) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:29] (step=0002828) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:30] (step=0002829) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:32] (step=0002830) Train Loss mse: 0.0503, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:10:34] (step=0002831) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:10:35] (step=0002832) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:37] (step=0002833) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:38] (step=0002834) Train Loss mse: 0.0084, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:40] (step=0002835) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:41] (step=0002836) Train Loss mse: 0.0076, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:43] (step=0002837) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:44] (step=0002838) Train Loss mse: 0.0075, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:46] (step=0002839) Train Loss mse: 0.0077, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:10:48] (step=0002840) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:10:50] (step=0002841) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:10:51] (step=0002842) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:53] (step=0002843) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:54] (step=0002844) Train Loss mse: 0.0081, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:55] (step=0002845) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:57] (step=0002846) Train Loss mse: 0.0192, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:10:58] (step=0002847) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:00] (step=0002848) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:11:02] (step=0002849) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:11:04] (step=0002850) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:11:05] (step=0002851) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:11:07] (step=0002852) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:11:08] (step=0002853) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:10] (step=0002854) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:11] (step=0002855) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:13] (step=0002856) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:11:14] (step=0002857) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:11:16] (step=0002858) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:11:18] (step=0002859) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:11:19] (step=0002860) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:21] (step=0002861) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:22] (step=0002862) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:24] (step=0002863) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:25] (step=0002864) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:27] (step=0002865) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:11:29] (step=0002866) Train Loss mse: 0.0075, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:11:30] (step=0002867) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:32] (step=0002868) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:11:34] (step=0002869) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:35] (step=0002870) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:11:37] (step=0002871) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:38] (step=0002872) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:40] (step=0002873) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:11:41] (step=0002874) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:43] (step=0002875) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:11:45] (step=0002876) Train Loss mse: 0.0081, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:11:47] (step=0002877) Train Loss mse: 0.0099, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:11:48] (step=0002878) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:50] (step=0002879) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:51] (step=0002880) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:11:53] (step=0002881) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:54] (step=0002882) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:56] (step=0002883) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:11:58] (step=0002884) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:11:59] (step=0002885) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:12:01] (step=0002886) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:12:02] (step=0002887) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:04] (step=0002888) Train Loss mse: 0.0077, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:12:06] (step=0002889) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:07] (step=0002890) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:12:09] (step=0002891) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:10] (step=0002892) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:12] (step=0002893) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:12:14] (step=0002894) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:12:16] (step=0002895) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:12:17] (step=0002896) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:19] (step=0002897) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:20] (step=0002898) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:12:22] (step=0002899) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:23] (step=0002900) Train Loss mse: 0.0114, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:12:25] (step=0002901) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:27] (step=0002902) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:12:28] (step=0002903) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:12:30] (step=0002904) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:32] (step=0002905) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:12:33] (step=0002906) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:35] (step=0002907) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:36] (step=0002908) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:12:38] (step=0002909) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:40] (step=0002910) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:12:41] (step=0002911) Train Loss mse: 0.0075, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:43] (step=0002912) Train Loss mse: 0.0076, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:44] (step=0002913) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:12:46] (step=0002914) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:48] (step=0002915) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:12:49] (step=0002916) Train Loss mse: 0.0097, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:51] (step=0002917) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:52] (step=0002918) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:12:54] (step=0002919) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:56] (step=0002920) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:12:57] (step=0002921) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:12:59] (step=0002922) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:01] (step=0002923) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:13:02] (step=0002924) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:04] (step=0002925) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:13:05] (step=0002926) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.66, +[2026-01-27 03:13:07] (step=0002927) Train Loss mse: 0.0080, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:08] (step=0002928) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:10] (step=0002929) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:13:12] (step=0002930) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:13:13] (step=0002931) Train Loss mse: 0.0079, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:15] (step=0002932) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:16] (step=0002933) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:13:18] (step=0002934) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:19] (step=0002935) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:21] (step=0002936) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:22] (step=0002937) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:24] (step=0002938) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:13:26] (step=0002939) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:13:28] (step=0002940) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:13:29] (step=0002941) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:31] (step=0002942) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:13:32] (step=0002943) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:34] (step=0002944) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:35] (step=0002945) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:37] (step=0002946) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:39] (step=0002947) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:13:41] (step=0002948) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:13:42] (step=0002949) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:44] (step=0002950) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:13:46] (step=0002951) Train Loss mse: 0.0127, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:47] (step=0002952) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:13:49] (step=0002953) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:50] (step=0002954) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:52] (step=0002955) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:13:54] (step=0002956) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:13:56] (step=0002957) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:13:57] (step=0002958) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:13:59] (step=0002959) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:14:00] (step=0002960) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:14:02] (step=0002961) Train Loss mse: 0.0079, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:14:04] (step=0002962) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:14:05] (step=0002963) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:14:07] (step=0002964) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:14:09] (step=0002965) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.50, +[2026-01-27 03:14:10] (step=0002966) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:14:12] (step=0002967) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:14:14] (step=0002968) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:14:15] (step=0002969) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:14:17] (step=0002970) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:14:18] (step=0002971) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:14:20] (step=0002972) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:14:22] (step=0002973) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:14:23] (step=0002974) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:14:25] (step=0002975) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:14:27] (step=0002976) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:14:28] (step=0002977) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:14:30] (step=0002978) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:14:32] (step=0002979) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:14:33] (step=0002980) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:14:35] (step=0002981) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:14:37] (step=0002982) Train Loss mse: 0.0080, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:14:39] (step=0002983) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:14:40] (step=0002984) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:14:41] (step=0002985) Train Loss mse: 0.0081, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:14:43] (step=0002986) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:14:45] (step=0002987) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:14:47] (step=0002988) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:14:48] (step=0002989) Train Loss mse: 0.0088, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:14:50] (step=0002990) Train Loss mse: 0.0095, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:14:52] (step=0002991) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:14:53] (step=0002992) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:14:55] (step=0002993) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:14:56] (step=0002994) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.60, +[2026-01-27 03:14:58] (step=0002995) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:15:00] (step=0002996) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:01] (step=0002997) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:15:03] (step=0002998) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:05] (step=0002999) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:15:11] (step=0003000) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.15, +[2026-01-27 03:15:13] (step=0003001) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:14] (step=0003002) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.66, +[2026-01-27 03:15:16] (step=0003003) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:17] (step=0003004) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:19] (step=0003005) Train Loss mse: 0.0077, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:21] (step=0003006) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:15:22] (step=0003007) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:15:24] (step=0003008) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:15:25] (step=0003009) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:15:27] (step=0003010) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:28] (step=0003011) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:30] (step=0003012) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:15:31] (step=0003013) Train Loss mse: 0.0089, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:15:33] (step=0003014) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:15:35] (step=0003015) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:36] (step=0003016) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:37] (step=0003017) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:39] (step=0003018) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:15:40] (step=0003019) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:42] (step=0003020) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:15:43] (step=0003021) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:45] (step=0003022) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:46] (step=0003023) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:15:48] (step=0003024) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:15:50] (step=0003025) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:51] (step=0003026) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:53] (step=0003027) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:54] (step=0003028) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:56] (step=0003029) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:15:57] (step=0003030) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:15:59] (step=0003031) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:00] (step=0003032) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:02] (step=0003033) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:16:04] (step=0003034) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:16:05] (step=0003035) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:07] (step=0003036) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:08] (step=0003037) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:09] (step=0003038) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:11] (step=0003039) Train Loss mse: 0.0079, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:12] (step=0003040) Train Loss mse: 0.0080, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:14] (step=0003041) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:16] (step=0003042) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:16:18] (step=0003043) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:16:19] (step=0003044) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:21] (step=0003045) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:22] (step=0003046) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:24] (step=0003047) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:25] (step=0003048) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:16:26] (step=0003049) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:28] (step=0003050) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:30] (step=0003051) Train Loss mse: 0.0076, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:16:32] (step=0003052) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:16:33] (step=0003053) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:35] (step=0003054) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:36] (step=0003055) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:38] (step=0003056) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:39] (step=0003057) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:41] (step=0003058) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:42] (step=0003059) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:16:44] (step=0003060) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:16:46] (step=0003061) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:16:47] (step=0003062) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:49] (step=0003063) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:50] (step=0003064) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:52] (step=0003065) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:53] (step=0003066) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:55] (step=0003067) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:16:56] (step=0003068) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:16:58] (step=0003069) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:17:00] (step=0003070) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:17:01] (step=0003071) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:03] (step=0003072) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:04] (step=0003073) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:06] (step=0003074) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:07] (step=0003075) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:09] (step=0003076) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:17:11] (step=0003077) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:17:12] (step=0003078) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:17:14] (step=0003079) Train Loss mse: 0.0076, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:17:15] (step=0003080) Train Loss mse: 0.0113, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:17] (step=0003081) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:18] (step=0003082) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:20] (step=0003083) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:17:22] (step=0003084) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:23] (step=0003085) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:17:25] (step=0003086) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:17:27] (step=0003087) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:28] (step=0003088) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:29] (step=0003089) Train Loss mse: 0.0107, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:31] (step=0003090) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:33] (step=0003091) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:17:34] (step=0003092) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:36] (step=0003093) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:17:38] (step=0003094) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:17:39] (step=0003095) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:41] (step=0003096) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:42] (step=0003097) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:44] (step=0003098) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:17:45] (step=0003099) Train Loss mse: 0.0082, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:47] (step=0003100) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:48] (step=0003101) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:17:50] (step=0003102) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:17:52] (step=0003103) Train Loss mse: 0.0087, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:53] (step=0003104) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:55] (step=0003105) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:17:56] (step=0003106) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:17:58] (step=0003107) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:00] (step=0003108) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:18:01] (step=0003109) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:03] (step=0003110) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:18:04] (step=0003111) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:06] (step=0003112) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:08] (step=0003113) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:18:09] (step=0003114) Train Loss mse: 0.0084, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:10] (step=0003115) Train Loss mse: 0.0030, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:12] (step=0003116) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:18:14] (step=0003117) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:18:15] (step=0003118) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:18:17] (step=0003119) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:18] (step=0003120) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:20] (step=0003121) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:21] (step=0003122) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:23] (step=0003123) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:18:25] (step=0003124) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:26] (step=0003125) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:28] (step=0003126) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:30] (step=0003127) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:18:31] (step=0003128) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:32] (step=0003129) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:34] (step=0003130) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:18:35] (step=0003131) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:18:37] (step=0003132) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:18:39] (step=0003133) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:18:40] (step=0003134) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:42] (step=0003135) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:43] (step=0003136) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:45] (step=0003137) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:18:47] (step=0003138) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:48] (step=0003139) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:50] (step=0003140) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:52] (step=0003141) Train Loss mse: 0.0092, Train Loss ce: 0.0000, Train Steps/Sec: 0.48, +[2026-01-27 03:18:53] (step=0003142) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:18:55] (step=0003143) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:56] (step=0003144) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:58] (step=0003145) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:18:59] (step=0003146) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:19:01] (step=0003147) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:02] (step=0003148) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:04] (step=0003149) Train Loss mse: 0.0099, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:06] (step=0003150) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.48, +[2026-01-27 03:19:08] (step=0003151) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:19:09] (step=0003152) Train Loss mse: 0.0079, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:11] (step=0003153) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:12] (step=0003154) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:14] (step=0003155) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:19:15] (step=0003156) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:17] (step=0003157) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:19] (step=0003158) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:19:20] (step=0003159) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:19:22] (step=0003160) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:19:23] (step=0003161) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:25] (step=0003162) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:26] (step=0003163) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:28] (step=0003164) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:19:30] (step=0003165) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:31] (step=0003166) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:33] (step=0003167) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:19:35] (step=0003168) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:19:36] (step=0003169) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:19:38] (step=0003170) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:39] (step=0003171) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:41] (step=0003172) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:42] (step=0003173) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:19:44] (step=0003174) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:46] (step=0003175) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:19:47] (step=0003176) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:19:49] (step=0003177) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:51] (step=0003178) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:19:52] (step=0003179) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:54] (step=0003180) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:19:55] (step=0003181) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:19:57] (step=0003182) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:19:59] (step=0003183) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:00] (step=0003184) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:20:02] (step=0003185) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:04] (step=0003186) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:20:05] (step=0003187) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:07] (step=0003188) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:08] (step=0003189) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:20:10] (step=0003190) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:20:11] (step=0003191) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:13] (step=0003192) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:20:15] (step=0003193) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:16] (step=0003194) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:20:18] (step=0003195) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:19] (step=0003196) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:21] (step=0003197) Train Loss mse: 0.0082, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:20:23] (step=0003198) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:24] (step=0003199) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:26] (step=0003200) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:20:27] (step=0003201) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:29] (step=0003202) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:20:31] (step=0003203) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:32] (step=0003204) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:34] (step=0003205) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:20:36] (step=0003206) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:20:37] (step=0003207) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:20:39] (step=0003208) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:40] (step=0003209) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:42] (step=0003210) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:20:43] (step=0003211) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:20:45] (step=0003212) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:20:47] (step=0003213) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:48] (step=0003214) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:20:50] (step=0003215) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:20:52] (step=0003216) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:53] (step=0003217) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:20:55] (step=0003218) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:56] (step=0003219) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:20:58] (step=0003220) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:20:59] (step=0003221) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:02] (step=0003222) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.48, +[2026-01-27 03:21:03] (step=0003223) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:04] (step=0003224) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:06] (step=0003225) Train Loss mse: 0.0084, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:21:08] (step=0003226) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:09] (step=0003227) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:11] (step=0003228) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:12] (step=0003229) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:14] (step=0003230) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:21:16] (step=0003231) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:21:18] (step=0003232) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:21:19] (step=0003233) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:21] (step=0003234) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:22] (step=0003235) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:23] (step=0003236) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:25] (step=0003237) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:26] (step=0003238) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:28] (step=0003239) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:21:30] (step=0003240) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:21:32] (step=0003241) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:21:33] (step=0003242) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:21:35] (step=0003243) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:36] (step=0003244) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:38] (step=0003245) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:39] (step=0003246) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:41] (step=0003247) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:42] (step=0003248) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:44] (step=0003249) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:21:46] (step=0003250) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.50, +[2026-01-27 03:21:48] (step=0003251) Train Loss mse: 0.0075, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:21:49] (step=0003252) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:51] (step=0003253) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:52] (step=0003254) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:54] (step=0003255) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:55] (step=0003256) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:21:57] (step=0003257) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:21:59] (step=0003258) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:22:01] (step=0003259) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.50, +[2026-01-27 03:22:03] (step=0003260) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:22:04] (step=0003261) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:22:06] (step=0003262) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:22:07] (step=0003263) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:22:09] (step=0003264) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:22:10] (step=0003265) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:22:12] (step=0003266) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:22:14] (step=0003267) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:22:16] (step=0003268) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:22:17] (step=0003269) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:22:19] (step=0003270) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:22:20] (step=0003271) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:22:22] (step=0003272) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:22:23] (step=0003273) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:22:25] (step=0003274) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:22:27] (step=0003275) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:22:29] (step=0003276) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:22:31] (step=0003277) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.50, +[2026-01-27 03:22:32] (step=0003278) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:22:34] (step=0003279) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:22:35] (step=0003280) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:22:37] (step=0003281) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:22:38] (step=0003282) Train Loss mse: 0.0179, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:22:40] (step=0003283) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:22:42] (step=0003284) Train Loss mse: 0.0096, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:22:43] (step=0003285) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:22:45] (step=0003286) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:22:47] (step=0003287) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:22:48] (step=0003288) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:22:50] (step=0003289) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:22:52] (step=0003290) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:22:53] (step=0003291) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:22:55] (step=0003292) Train Loss mse: 0.0095, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:22:57] (step=0003293) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:22:59] (step=0003294) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:23:00] (step=0003295) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:23:02] (step=0003296) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:23:03] (step=0003297) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:05] (step=0003298) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:07] (step=0003299) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:23:08] (step=0003300) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:10] (step=0003301) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:23:12] (step=0003302) Train Loss mse: 0.0082, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:13] (step=0003303) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:23:15] (step=0003304) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:23:17] (step=0003305) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:18] (step=0003306) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:23:20] (step=0003307) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:23:22] (step=0003308) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:23] (step=0003309) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:23:25] (step=0003310) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:27] (step=0003311) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.50, +[2026-01-27 03:23:28] (step=0003312) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:23:30] (step=0003313) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:32] (step=0003314) Train Loss mse: 0.0080, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:23:33] (step=0003315) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:35] (step=0003316) Train Loss mse: 0.0120, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:23:36] (step=0003317) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:38] (step=0003318) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:40] (step=0003319) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:23:41] (step=0003320) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:43] (step=0003321) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:23:45] (step=0003322) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:23:46] (step=0003323) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:48] (step=0003324) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:23:50] (step=0003325) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:51] (step=0003326) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:23:53] (step=0003327) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:23:54] (step=0003328) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:56] (step=0003329) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:23:58] (step=0003330) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:24:00] (step=0003331) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:24:01] (step=0003332) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:24:03] (step=0003333) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:05] (step=0003334) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:24:06] (step=0003335) Train Loss mse: 0.0159, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:07] (step=0003336) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:09] (step=0003337) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:10] (step=0003338) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:12] (step=0003339) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:24:14] (step=0003340) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:24:16] (step=0003341) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.44, +[2026-01-27 03:24:18] (step=0003342) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:19] (step=0003343) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:21] (step=0003344) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:22] (step=0003345) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:24] (step=0003346) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:25] (step=0003347) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:24:27] (step=0003348) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.50, +[2026-01-27 03:24:29] (step=0003349) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:24:31] (step=0003350) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:24:33] (step=0003351) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:24:34] (step=0003352) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:36] (step=0003353) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:37] (step=0003354) Train Loss mse: 0.0079, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:39] (step=0003355) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:40] (step=0003356) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:24:42] (step=0003357) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:24:44] (step=0003358) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:24:46] (step=0003359) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:24:47] (step=0003360) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:24:49] (step=0003361) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:50] (step=0003362) Train Loss mse: 0.0116, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:52] (step=0003363) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:53] (step=0003364) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:24:55] (step=0003365) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:24:57] (step=0003366) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:24:58] (step=0003367) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:25:00] (step=0003368) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:25:02] (step=0003369) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:25:03] (step=0003370) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:25:05] (step=0003371) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:25:06] (step=0003372) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:25:08] (step=0003373) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:25:10] (step=0003374) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:25:11] (step=0003375) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:25:13] (step=0003376) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:25:15] (step=0003377) Train Loss mse: 0.0082, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:25:17] (step=0003378) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:25:18] (step=0003379) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:25:20] (step=0003380) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:25:21] (step=0003381) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:25:23] (step=0003382) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:25:24] (step=0003383) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:25:26] (step=0003384) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:25:28] (step=0003385) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:25:30] (step=0003386) Train Loss mse: 0.0075, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:25:31] (step=0003387) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:25:33] (step=0003388) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:25:35] (step=0003389) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:25:36] (step=0003390) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.60, +[2026-01-27 03:25:38] (step=0003391) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:25:39] (step=0003392) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:25:41] (step=0003393) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:25:43] (step=0003394) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:25:45] (step=0003395) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:25:46] (step=0003396) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:25:48] (step=0003397) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:25:50] (step=0003398) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:25:51] (step=0003399) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:25:53] (step=0003400) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:25:54] (step=0003401) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:25:56] (step=0003402) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:25:58] (step=0003403) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:26:00] (step=0003404) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:26:01] (step=0003405) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:26:03] (step=0003406) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:26:04] (step=0003407) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:26:06] (step=0003408) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:26:08] (step=0003409) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:26:10] (step=0003410) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.48, +[2026-01-27 03:26:11] (step=0003411) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:26:13] (step=0003412) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:26:15] (step=0003413) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:26:16] (step=0003414) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:26:18] (step=0003415) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:26:20] (step=0003416) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:26:21] (step=0003417) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:26:23] (step=0003418) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:26:24] (step=0003419) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:26:27] (step=0003420) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.45, +[2026-01-27 03:26:28] (step=0003421) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:26:30] (step=0003422) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:26:31] (step=0003423) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:26:33] (step=0003424) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:26:35] (step=0003425) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:26:36] (step=0003426) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:26:38] (step=0003427) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:26:39] (step=0003428) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:26:41] (step=0003429) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:26:43] (step=0003430) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:26:45] (step=0003431) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:26:46] (step=0003432) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:26:48] (step=0003433) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:26:50] (step=0003434) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:26:51] (step=0003435) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:26:53] (step=0003436) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:26:54] (step=0003437) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:26:56] (step=0003438) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:26:58] (step=0003439) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:27:00] (step=0003440) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:27:01] (step=0003441) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:27:03] (step=0003442) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:27:05] (step=0003443) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:27:06] (step=0003444) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:27:08] (step=0003445) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:27:09] (step=0003446) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:27:11] (step=0003447) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:27:12] (step=0003448) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:27:14] (step=0003449) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:27:16] (step=0003450) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:27:18] (step=0003451) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:27:19] (step=0003452) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:27:21] (step=0003453) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:27:22] (step=0003454) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:27:24] (step=0003455) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:27:26] (step=0003456) Train Loss mse: 0.0091, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:27:27] (step=0003457) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:27:29] (step=0003458) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:27:31] (step=0003459) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:27:33] (step=0003460) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:27:34] (step=0003461) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:27:36] (step=0003462) Train Loss mse: 0.0026, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:27:37] (step=0003463) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:27:39] (step=0003464) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:27:41] (step=0003465) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:27:42] (step=0003466) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:27:44] (step=0003467) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:27:46] (step=0003468) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.50, +[2026-01-27 03:27:48] (step=0003469) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:27:49] (step=0003470) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:27:51] (step=0003471) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:27:53] (step=0003472) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:27:54] (step=0003473) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:27:56] (step=0003474) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:27:57] (step=0003475) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:27:59] (step=0003476) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:28:01] (step=0003477) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.50, +[2026-01-27 03:28:03] (step=0003478) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:28:04] (step=0003479) Train Loss mse: 0.0081, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:28:06] (step=0003480) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:28:07] (step=0003481) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:28:09] (step=0003482) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:28:11] (step=0003483) Train Loss mse: 0.0122, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:28:12] (step=0003484) Train Loss mse: 0.0077, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:28:14] (step=0003485) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:28:16] (step=0003486) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:28:18] (step=0003487) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.50, +[2026-01-27 03:28:19] (step=0003488) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:28:21] (step=0003489) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:28:23] (step=0003490) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:28:24] (step=0003491) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:28:26] (step=0003492) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:28:27] (step=0003493) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:28:29] (step=0003494) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:28:31] (step=0003495) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:28:33] (step=0003496) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:28:35] (step=0003497) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:28:36] (step=0003498) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:28:38] (step=0003499) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:28:45] (step=0003500) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.15, +[2026-01-27 03:28:46] (step=0003501) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:28:47] (step=0003502) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:28:49] (step=0003503) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:28:50] (step=0003504) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:28:52] (step=0003505) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:28:54] (step=0003506) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.54, +[2026-01-27 03:28:56] (step=0003507) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:28:57] (step=0003508) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:28:58] (step=0003509) Train Loss mse: 0.0130, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:00] (step=0003510) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:29:01] (step=0003511) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:03] (step=0003512) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:04] (step=0003513) Train Loss mse: 0.0088, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:06] (step=0003514) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:29:08] (step=0003515) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:09] (step=0003516) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:11] (step=0003517) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:12] (step=0003518) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:14] (step=0003519) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:15] (step=0003520) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:29:17] (step=0003521) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:18] (step=0003522) Train Loss mse: 0.0081, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:20] (step=0003523) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:22] (step=0003524) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:29:23] (step=0003525) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:25] (step=0003526) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:26] (step=0003527) Train Loss mse: 0.0079, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:28] (step=0003528) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:29] (step=0003529) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:31] (step=0003530) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:32] (step=0003531) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:29:33] (step=0003532) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:35] (step=0003533) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:29:37] (step=0003534) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:29:38] (step=0003535) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:40] (step=0003536) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:41] (step=0003537) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:43] (step=0003538) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:44] (step=0003539) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:46] (step=0003540) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:47] (step=0003541) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:29:49] (step=0003542) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:29:51] (step=0003543) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:29:53] (step=0003544) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:54] (step=0003545) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:55] (step=0003546) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:57] (step=0003547) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:29:58] (step=0003548) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:00] (step=0003549) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:01] (step=0003550) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:03] (step=0003551) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:30:05] (step=0003552) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:30:07] (step=0003553) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:08] (step=0003554) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:10] (step=0003555) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:11] (step=0003556) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:12] (step=0003557) Train Loss mse: 0.0030, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:14] (step=0003558) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:16] (step=0003559) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:30:17] (step=0003560) Train Loss mse: 0.0080, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:30:19] (step=0003561) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:30:21] (step=0003562) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:22] (step=0003563) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:24] (step=0003564) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:25] (step=0003565) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:30:27] (step=0003566) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:28] (step=0003567) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:30] (step=0003568) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:30:32] (step=0003569) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:30:33] (step=0003570) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:30:35] (step=0003571) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:36] (step=0003572) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:38] (step=0003573) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step4000 +Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val +[eval debug] first 3 batch fingerprints: + fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] +ce_avg: 0.0, mse_avg: 0.010126573964953423 +base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step4500 +Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val +[eval debug] first 3 batch fingerprints: + fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] +ce_avg: 0.0, mse_avg: 0.00975842121988535 +[2026-01-27 03:30:39] (step=0003574) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:41] (step=0003575) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:42] (step=0003576) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:30:44] (step=0003577) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:30:46] (step=0003578) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:30:47] (step=0003579) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:49] (step=0003580) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:50] (step=0003581) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:52] (step=0003582) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:53] (step=0003583) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:30:55] (step=0003584) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:30:57] (step=0003585) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:30:58] (step=0003586) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:31:00] (step=0003587) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:01] (step=0003588) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:03] (step=0003589) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:04] (step=0003590) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:06] (step=0003591) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:31:08] (step=0003592) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:09] (step=0003593) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:31:11] (step=0003594) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:31:12] (step=0003595) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:14] (step=0003596) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:15] (step=0003597) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:31:17] (step=0003598) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:31:19] (step=0003599) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:20] (step=0003600) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:22] (step=0003601) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:31:23] (step=0003602) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:31:25] (step=0003603) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:26] (step=0003604) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:31:28] (step=0003605) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:30] (step=0003606) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:31:31] (step=0003607) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:33] (step=0003608) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:31:35] (step=0003609) Train Loss mse: 0.0092, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:31:36] (step=0003610) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:31:37] (step=0003611) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:31:39] (step=0003612) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:41] (step=0003613) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:31:42] (step=0003614) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:44] (step=0003615) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:45] (step=0003616) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:31:47] (step=0003617) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:31:49] (step=0003618) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:50] (step=0003619) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:52] (step=0003620) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:53] (step=0003621) Train Loss mse: 0.0086, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:55] (step=0003622) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:31:57] (step=0003623) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:31:58] (step=0003624) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:00] (step=0003625) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:01] (step=0003626) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:03] (step=0003627) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:32:04] (step=0003628) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:06] (step=0003629) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:07] (step=0003630) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:09] (step=0003631) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:10] (step=0003632) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:32:12] (step=0003633) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:32:14] (step=0003634) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:15] (step=0003635) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:32:17] (step=0003636) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:32:18] (step=0003637) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:20] (step=0003638) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:21] (step=0003639) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:23] (step=0003640) Train Loss mse: 0.0082, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:24] (step=0003641) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:32:26] (step=0003642) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:32:28] (step=0003643) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:29] (step=0003644) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:31] (step=0003645) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:32] (step=0003646) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:32:34] (step=0003647) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:35] (step=0003648) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:32:37] (step=0003649) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:39] (step=0003650) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:32:40] (step=0003651) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:32:42] (step=0003652) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:43] (step=0003653) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:45] (step=0003654) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:46] (step=0003655) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:32:48] (step=0003656) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:49] (step=0003657) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:51] (step=0003658) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:32:53] (step=0003659) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:54] (step=0003660) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:32:56] (step=0003661) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:57] (step=0003662) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:32:59] (step=0003663) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:33:01] (step=0003664) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:33:02] (step=0003665) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:03] (step=0003666) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:05] (step=0003667) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.54, +[2026-01-27 03:33:07] (step=0003668) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:08] (step=0003669) Train Loss mse: 0.0029, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:33:10] (step=0003670) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:11] (step=0003671) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:13] (step=0003672) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:33:15] (step=0003673) Train Loss mse: 0.0076, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:33:16] (step=0003674) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:18] (step=0003675) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:33:19] (step=0003676) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:21] (step=0003677) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:23] (step=0003678) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:33:24] (step=0003679) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:26] (step=0003680) Train Loss mse: 0.0097, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:27] (step=0003681) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:33:29] (step=0003682) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:33:31] (step=0003683) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:32] (step=0003684) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:34] (step=0003685) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:35] (step=0003686) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:33:37] (step=0003687) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:38] (step=0003688) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:40] (step=0003689) Train Loss mse: 0.0096, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:33:42] (step=0003690) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:33:43] (step=0003691) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:45] (step=0003692) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:46] (step=0003693) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:48] (step=0003694) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:33:49] (step=0003695) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:51] (step=0003696) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:33:53] (step=0003697) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:33:54] (step=0003698) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:33:56] (step=0003699) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:57] (step=0003700) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:33:59] (step=0003701) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:00] (step=0003702) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:34:02] (step=0003703) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:04] (step=0003704) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:34:06] (step=0003705) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.54, +[2026-01-27 03:34:07] (step=0003706) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:08] (step=0003707) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:10] (step=0003708) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:11] (step=0003709) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:13] (step=0003710) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:34:15] (step=0003711) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:34:17] (step=0003712) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:34:18] (step=0003713) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:20] (step=0003714) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:21] (step=0003715) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:23] (step=0003716) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:24] (step=0003717) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:34:26] (step=0003718) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:28] (step=0003719) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:34:29] (step=0003720) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:31] (step=0003721) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:32] (step=0003722) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:34:34] (step=0003723) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:35] (step=0003724) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:37] (step=0003725) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:34:38] (step=0003726) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:40] (step=0003727) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:41] (step=0003728) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:43] (step=0003729) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:34:45] (step=0003730) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:46] (step=0003731) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:34:48] (step=0003732) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:34:50] (step=0003733) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:51] (step=0003734) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:53] (step=0003735) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:54] (step=0003736) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:56] (step=0003737) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:34:57] (step=0003738) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:34:59] (step=0003739) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:01] (step=0003740) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:35:02] (step=0003741) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:04] (step=0003742) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:35:05] (step=0003743) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:35:07] (step=0003744) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:35:08] (step=0003745) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:35:10] (step=0003746) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:35:11] (step=0003747) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:13] (step=0003748) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:35:15] (step=0003749) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:35:17] (step=0003750) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:18] (step=0003751) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:35:20] (step=0003752) Train Loss mse: 0.0076, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:21] (step=0003753) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:35:23] (step=0003754) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:35:24] (step=0003755) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:26] (step=0003756) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:28] (step=0003757) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:35:30] (step=0003758) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:35:31] (step=0003759) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:35:33] (step=0003760) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:35:34] (step=0003761) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.66, +[2026-01-27 03:35:36] (step=0003762) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:37] (step=0003763) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:39] (step=0003764) Train Loss mse: 0.0028, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:40] (step=0003765) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:42] (step=0003766) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:35:44] (step=0003767) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:35:45] (step=0003768) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:47] (step=0003769) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:35:49] (step=0003770) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:50] (step=0003771) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:52] (step=0003772) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:35:53] (step=0003773) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:35:55] (step=0003774) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:35:57] (step=0003775) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:35:58] (step=0003776) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:00] (step=0003777) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:36:01] (step=0003778) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:03] (step=0003779) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:04] (step=0003780) Train Loss mse: 0.0076, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:06] (step=0003781) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:36:08] (step=0003782) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:10] (step=0003783) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:36:11] (step=0003784) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:13] (step=0003785) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:14] (step=0003786) Train Loss mse: 0.0030, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:36:16] (step=0003787) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:17] (step=0003788) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:19] (step=0003789) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:36:21] (step=0003790) Train Loss mse: 0.0085, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:23] (step=0003791) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:36:24] (step=0003792) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:25] (step=0003793) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:27] (step=0003794) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:36:29] (step=0003795) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:31] (step=0003796) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:36:32] (step=0003797) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:34] (step=0003798) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:36:35] (step=0003799) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:36:37] (step=0003800) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:39] (step=0003801) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:36:40] (step=0003802) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:42] (step=0003803) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:43] (step=0003804) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:36:45] (step=0003805) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:47] (step=0003806) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:36:48] (step=0003807) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:50] (step=0003808) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:36:51] (step=0003809) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:36:53] (step=0003810) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:55] (step=0003811) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:36:56] (step=0003812) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:36:58] (step=0003813) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:37:00] (step=0003814) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:37:01] (step=0003815) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:37:03] (step=0003816) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:37:04] (step=0003817) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:06] (step=0003818) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:07] (step=0003819) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:09] (step=0003820) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:11] (step=0003821) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.48, +[2026-01-27 03:37:12] (step=0003822) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:14] (step=0003823) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:15] (step=0003824) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:37:17] (step=0003825) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:18] (step=0003826) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:20] (step=0003827) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:21] (step=0003828) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:23] (step=0003829) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:25] (step=0003830) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:37:27] (step=0003831) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:37:28] (step=0003832) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:30] (step=0003833) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:37:31] (step=0003834) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:33] (step=0003835) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:34] (step=0003836) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:37:36] (step=0003837) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:37:37] (step=0003838) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:37:39] (step=0003839) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:37:41] (step=0003840) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:37:43] (step=0003841) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:37:44] (step=0003842) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:46] (step=0003843) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:47] (step=0003844) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:49] (step=0003845) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:50] (step=0003846) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:52] (step=0003847) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:37:54] (step=0003848) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:37:55] (step=0003849) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:37:57] (step=0003850) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:37:59] (step=0003851) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:00] (step=0003852) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:02] (step=0003853) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:03] (step=0003854) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:04] (step=0003855) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:06] (step=0003856) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:38:08] (step=0003857) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:38:10] (step=0003858) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:38:11] (step=0003859) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:38:13] (step=0003860) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:14] (step=0003861) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:16] (step=0003862) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:38:17] (step=0003863) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:19] (step=0003864) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:21] (step=0003865) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:38:22] (step=0003866) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:38:24] (step=0003867) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:38:26] (step=0003868) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:38:27] (step=0003869) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:29] (step=0003870) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:30] (step=0003871) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:32] (step=0003872) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:33] (step=0003873) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:38:35] (step=0003874) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:38:37] (step=0003875) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:39] (step=0003876) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:38:40] (step=0003877) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:42] (step=0003878) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:43] (step=0003879) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:38:45] (step=0003880) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:38:46] (step=0003881) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:48] (step=0003882) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:38:50] (step=0003883) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:51] (step=0003884) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:38:53] (step=0003885) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:38:55] (step=0003886) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:38:56] (step=0003887) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:38:58] (step=0003888) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:38:59] (step=0003889) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:01] (step=0003890) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:39:02] (step=0003891) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:39:04] (step=0003892) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:39:06] (step=0003893) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:39:08] (step=0003894) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:09] (step=0003895) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:39:11] (step=0003896) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:12] (step=0003897) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:14] (step=0003898) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:39:16] (step=0003899) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:17] (step=0003900) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:39:19] (step=0003901) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:20] (step=0003902) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:22] (step=0003903) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:39:23] (step=0003904) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:25] (step=0003905) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:39:27] (step=0003906) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:28] (step=0003907) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:30] (step=0003908) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:39:31] (step=0003909) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:33] (step=0003910) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:39:35] (step=0003911) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:36] (step=0003912) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:38] (step=0003913) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:39:39] (step=0003914) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:41] (step=0003915) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:39:43] (step=0003916) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:44] (step=0003917) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:46] (step=0003918) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:39:47] (step=0003919) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:49] (step=0003920) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:39:51] (step=0003921) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:52] (step=0003922) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:54] (step=0003923) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:39:55] (step=0003924) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:39:57] (step=0003925) Train Loss mse: 0.0086, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:39:58] (step=0003926) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:00] (step=0003927) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:01] (step=0003928) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:03] (step=0003929) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:40:05] (step=0003930) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:40:06] (step=0003931) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:08] (step=0003932) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:10] (step=0003933) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:40:11] (step=0003934) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:13] (step=0003935) Train Loss mse: 0.0077, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:14] (step=0003936) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:40:16] (step=0003937) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:17] (step=0003938) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:40:19] (step=0003939) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:40:21] (step=0003940) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:40:22] (step=0003941) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:24] (step=0003942) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:40:25] (step=0003943) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:27] (step=0003944) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:28] (step=0003945) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:30] (step=0003946) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:32] (step=0003947) Train Loss mse: 0.0095, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:40:34] (step=0003948) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.48, +[2026-01-27 03:40:35] (step=0003949) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:40:37] (step=0003950) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:38] (step=0003951) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:40] (step=0003952) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:40:42] (step=0003953) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:43] (step=0003954) Train Loss mse: 0.0082, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:45] (step=0003955) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:40:46] (step=0003956) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:48] (step=0003957) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.48, +[2026-01-27 03:40:50] (step=0003958) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:40:51] (step=0003959) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:53] (step=0003960) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:55] (step=0003961) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:40:56] (step=0003962) Train Loss mse: 0.0082, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:58] (step=0003963) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:40:59] (step=0003964) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:41:01] (step=0003965) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:41:03] (step=0003966) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:41:05] (step=0003967) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:41:06] (step=0003968) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:07] (step=0003969) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:09] (step=0003970) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:41:11] (step=0003971) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:41:12] (step=0003972) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:41:14] (step=0003973) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:16] (step=0003974) Train Loss mse: 0.0089, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:41:18] (step=0003975) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.50, +[2026-01-27 03:41:19] (step=0003976) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:21] (step=0003977) Train Loss mse: 0.0088, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:22] (step=0003978) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:41:24] (step=0003979) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:41:25] (step=0003980) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:27] (step=0003981) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:29] (step=0003982) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:41:31] (step=0003983) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:41:32] (step=0003984) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:41:34] (step=0003985) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:35] (step=0003986) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:37] (step=0003987) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:41:39] (step=0003988) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:40] (step=0003989) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:41:42] (step=0003990) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:44] (step=0003991) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:41:45] (step=0003992) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:41:47] (step=0003993) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:48] (step=0003994) Train Loss mse: 0.0085, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:41:50] (step=0003995) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:41:52] (step=0003996) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:53] (step=0003997) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:41:55] (step=0003998) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:41:57] (step=0003999) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:42:03] (step=0004000) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.15, +[2026-01-27 03:42:05] (step=0004001) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:06] (step=0004002) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:08] (step=0004003) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:09] (step=0004004) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:42:11] (step=0004005) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:13] (step=0004006) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:42:14] (step=0004007) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:42:16] (step=0004008) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:17] (step=0004009) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:19] (step=0004010) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:20] (step=0004011) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:22] (step=0004012) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:23] (step=0004013) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:25] (step=0004014) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:42:27] (step=0004015) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:28] (step=0004016) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:42:30] (step=0004017) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:31] (step=0004018) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:33] (step=0004019) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:34] (step=0004020) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:35] (step=0004021) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:37] (step=0004022) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:38] (step=0004023) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:40] (step=0004024) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:42:42] (step=0004025) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:43] (step=0004026) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:45] (step=0004027) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:42:46] (step=0004028) Train Loss mse: 0.0030, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:48] (step=0004029) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:49] (step=0004030) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:51] (step=0004031) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:52] (step=0004032) Train Loss mse: 0.0082, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:54] (step=0004033) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:42:56] (step=0004034) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:42:57] (step=0004035) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:42:59] (step=0004036) Train Loss mse: 0.0080, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:00] (step=0004037) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:02] (step=0004038) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:03] (step=0004039) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:05] (step=0004040) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:06] (step=0004041) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:08] (step=0004042) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:43:10] (step=0004043) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:43:11] (step=0004044) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:13] (step=0004045) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:14] (step=0004046) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:16] (step=0004047) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:17] (step=0004048) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:19] (step=0004049) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:20] (step=0004050) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:22] (step=0004051) Train Loss mse: 0.0098, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:43:24] (step=0004052) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:43:26] (step=0004053) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:27] (step=0004054) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:28] (step=0004055) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:30] (step=0004056) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:31] (step=0004057) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:33] (step=0004058) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:35] (step=0004059) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:43:36] (step=0004060) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:43:38] (step=0004061) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:43:40] (step=0004062) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:41] (step=0004063) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:42] (step=0004064) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:44] (step=0004065) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:45] (step=0004066) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:47] (step=0004067) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:49] (step=0004068) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:43:50] (step=0004069) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:43:52] (step=0004070) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:43:54] (step=0004071) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:55] (step=0004072) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:43:57] (step=0004073) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:43:58] (step=0004074) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:00] (step=0004075) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:01] (step=0004076) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:44:03] (step=0004077) Train Loss mse: 0.0084, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:44:05] (step=0004078) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:44:06] (step=0004079) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:08] (step=0004080) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:09] (step=0004081) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:11] (step=0004082) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:12] (step=0004083) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:44:14] (step=0004084) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:16] (step=0004085) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:44:17] (step=0004086) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:44:19] (step=0004087) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:20] (step=0004088) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:22] (step=0004089) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:23] (step=0004090) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:25] (step=0004091) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:44:26] (step=0004092) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:28] (step=0004093) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:44:30] (step=0004094) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:44:31] (step=0004095) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:33] (step=0004096) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:34] (step=0004097) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:36] (step=0004098) Train Loss mse: 0.0030, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:44:38] (step=0004099) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:39] (step=0004100) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:41] (step=0004101) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:44:42] (step=0004102) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:44:44] (step=0004103) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:45] (step=0004104) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:44:47] (step=0004105) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:49] (step=0004106) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:44:50] (step=0004107) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:44:52] (step=0004108) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:44:53] (step=0004109) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:44:55] (step=0004110) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:56] (step=0004111) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:44:58] (step=0004112) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:00] (step=0004113) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:45:01] (step=0004114) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:45:03] (step=0004115) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:04] (step=0004116) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:45:06] (step=0004117) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:45:08] (step=0004118) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:09] (step=0004119) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:10] (step=0004120) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:12] (step=0004121) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:13] (step=0004122) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:15] (step=0004123) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:45:17] (step=0004124) Train Loss mse: 0.0134, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:18] (step=0004125) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:20] (step=0004126) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:22] (step=0004127) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:45:23] (step=0004128) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:25] (step=0004129) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:26] (step=0004130) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:27] (step=0004131) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:45:29] (step=0004132) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:45:31] (step=0004133) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:45:32] (step=0004134) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:34] (step=0004135) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:36] (step=0004136) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:45:37] (step=0004137) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:45:39] (step=0004138) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:40] (step=0004139) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:42] (step=0004140) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:43] (step=0004141) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:45:45] (step=0004142) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:45:46] (step=0004143) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:48] (step=0004144) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:45:49] (step=0004145) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:51] (step=0004146) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:45:53] (step=0004147) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:54] (step=0004148) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:56] (step=0004149) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:45:57] (step=0004150) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:45:59] (step=0004151) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:46:01] (step=0004152) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:02] (step=0004153) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:46:04] (step=0004154) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:05] (step=0004155) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:46:07] (step=0004156) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:08] (step=0004157) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:10] (step=0004158) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:46:12] (step=0004159) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:46:14] (step=0004160) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:46:15] (step=0004161) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:46:17] (step=0004162) Train Loss mse: 0.0087, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:18] (step=0004163) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:20] (step=0004164) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:46:21] (step=0004165) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:23] (step=0004166) Train Loss mse: 0.0118, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:24] (step=0004167) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:46:26] (step=0004168) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:46:28] (step=0004169) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:46:29] (step=0004170) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:31] (step=0004171) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:32] (step=0004172) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:46:34] (step=0004173) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:46:35] (step=0004174) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:37] (step=0004175) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:46:39] (step=0004176) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:40] (step=0004177) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:46:42] (step=0004178) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:46:44] (step=0004179) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:45] (step=0004180) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:46:47] (step=0004181) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:46:49] (step=0004182) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:46:50] (step=0004183) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:52] (step=0004184) Train Loss mse: 0.0029, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:53] (step=0004185) Train Loss mse: 0.0084, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:46:55] (step=0004186) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:46:56] (step=0004187) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:46:58] (step=0004188) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:00] (step=0004189) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:47:01] (step=0004190) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:47:03] (step=0004191) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:05] (step=0004192) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:47:06] (step=0004193) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:08] (step=0004194) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:47:09] (step=0004195) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:11] (step=0004196) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:47:13] (step=0004197) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:47:14] (step=0004198) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:47:16] (step=0004199) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:17] (step=0004200) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:47:19] (step=0004201) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:20] (step=0004202) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:47:22] (step=0004203) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:24] (step=0004204) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:47:25] (step=0004205) Train Loss mse: 0.0096, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:47:27] (step=0004206) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:29] (step=0004207) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:47:30] (step=0004208) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:32] (step=0004209) Train Loss mse: 0.0030, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:33] (step=0004210) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:47:35] (step=0004211) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:47:37] (step=0004212) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:47:38] (step=0004213) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:40] (step=0004214) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:41] (step=0004215) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:47:43] (step=0004216) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:45] (step=0004217) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:47:46] (step=0004218) Train Loss mse: 0.0083, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:48] (step=0004219) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:47:49] (step=0004220) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:51] (step=0004221) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:53] (step=0004222) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:47:54] (step=0004223) Train Loss mse: 0.0030, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:56] (step=0004224) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:47:57] (step=0004225) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:47:59] (step=0004226) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:00] (step=0004227) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:02] (step=0004228) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:04] (step=0004229) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:48:05] (step=0004230) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:07] (step=0004231) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:48:09] (step=0004232) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:48:10] (step=0004233) Train Loss mse: 0.0077, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:48:12] (step=0004234) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:13] (step=0004235) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:15] (step=0004236) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:16] (step=0004237) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:18] (step=0004238) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:19] (step=0004239) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:48:21] (step=0004240) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:48:23] (step=0004241) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:48:24] (step=0004242) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:48:26] (step=0004243) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:27] (step=0004244) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:29] (step=0004245) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:30] (step=0004246) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:32] (step=0004247) Train Loss mse: 0.0079, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:34] (step=0004248) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:48:35] (step=0004249) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:48:37] (step=0004250) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:48:39] (step=0004251) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:48:40] (step=0004252) Train Loss mse: 0.0083, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:42] (step=0004253) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:43] (step=0004254) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:45] (step=0004255) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:46] (step=0004256) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:48] (step=0004257) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:48:49] (step=0004258) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:51] (step=0004259) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:48:53] (step=0004260) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:48:54] (step=0004261) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:56] (step=0004262) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:57] (step=0004263) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:48:59] (step=0004264) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:00] (step=0004265) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:02] (step=0004266) Train Loss mse: 0.0075, Train Loss ce: 0.0000, Train Steps/Sec: 0.48, +[2026-01-27 03:49:04] (step=0004267) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:49:05] (step=0004268) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:07] (step=0004269) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:49:09] (step=0004270) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:10] (step=0004271) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:12] (step=0004272) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:13] (step=0004273) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:15] (step=0004274) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:49:17] (step=0004275) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:49:18] (step=0004276) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:49:20] (step=0004277) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:49:21] (step=0004278) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:23] (step=0004279) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:24] (step=0004280) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:26] (step=0004281) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:49:28] (step=0004282) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:29] (step=0004283) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:31] (step=0004284) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:49:32] (step=0004285) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:34] (step=0004286) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:49:36] (step=0004287) Train Loss mse: 0.0086, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:49:37] (step=0004288) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:39] (step=0004289) Train Loss mse: 0.0023, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:49:40] (step=0004290) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:42] (step=0004291) Train Loss mse: 0.0079, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:49:44] (step=0004292) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:49:45] (step=0004293) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:49:47] (step=0004294) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:49:48] (step=0004295) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:50] (step=0004296) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:49:52] (step=0004297) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:53] (step=0004298) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:49:55] (step=0004299) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:49:57] (step=0004300) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:49:58] (step=0004301) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:50:00] (step=0004302) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:01] (step=0004303) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:03] (step=0004304) Train Loss mse: 0.0027, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:50:05] (step=0004305) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:06] (step=0004306) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:50:08] (step=0004307) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:09] (step=0004308) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:50:11] (step=0004309) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:50:13] (step=0004310) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:14] (step=0004311) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:50:16] (step=0004312) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:17] (step=0004313) Train Loss mse: 0.0076, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:19] (step=0004314) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:50:21] (step=0004315) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:22] (step=0004316) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:50:24] (step=0004317) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:25] (step=0004318) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:27] (step=0004319) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:28] (step=0004320) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:30] (step=0004321) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:50:32] (step=0004322) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:34] (step=0004323) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:50:35] (step=0004324) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:50:37] (step=0004325) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:38] (step=0004326) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:40] (step=0004327) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:41] (step=0004328) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:43] (step=0004329) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:44] (step=0004330) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:50:46] (step=0004331) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:50:48] (step=0004332) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:49] (step=0004333) Train Loss mse: 0.0085, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:51] (step=0004334) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:52] (step=0004335) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:54] (step=0004336) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:55] (step=0004337) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:57] (step=0004338) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:50:59] (step=0004339) Train Loss mse: 0.0073, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:51:00] (step=0004340) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:51:02] (step=0004341) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.51, +[2026-01-27 03:51:04] (step=0004342) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:05] (step=0004343) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:07] (step=0004344) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:08] (step=0004345) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:10] (step=0004346) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:11] (step=0004347) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:13] (step=0004348) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:51:15] (step=0004349) Train Loss mse: 0.0088, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:51:16] (step=0004350) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:51:18] (step=0004351) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:51:19] (step=0004352) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:21] (step=0004353) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:22] (step=0004354) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:24] (step=0004355) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:26] (step=0004356) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:51:27] (step=0004357) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:29] (step=0004358) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:51:31] (step=0004359) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:51:32] (step=0004360) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:51:34] (step=0004361) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:35] (step=0004362) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:37] (step=0004363) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:38] (step=0004364) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:40] (step=0004365) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.48, +[2026-01-27 03:51:42] (step=0004366) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:43] (step=0004367) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:51:45] (step=0004368) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:51:47] (step=0004369) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:51:48] (step=0004370) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:50] (step=0004371) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:51] (step=0004372) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:53] (step=0004373) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.48, +[2026-01-27 03:51:55] (step=0004374) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:56] (step=0004375) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:51:58] (step=0004376) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 03:52:00] (step=0004377) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:01] (step=0004378) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:52:03] (step=0004379) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:05] (step=0004380) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:52:06] (step=0004381) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:52:08] (step=0004382) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:09] (step=0004383) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:11] (step=0004384) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:52:13] (step=0004385) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:52:15] (step=0004386) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:52:16] (step=0004387) Train Loss mse: 0.0029, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:18] (step=0004388) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:52:20] (step=0004389) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:52:21] (step=0004390) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:52:22] (step=0004391) Train Loss mse: 0.0075, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:24] (step=0004392) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:52:26] (step=0004393) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:52:27] (step=0004394) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:29] (step=0004395) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:52:31] (step=0004396) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:52:33] (step=0004397) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 03:52:34] (step=0004398) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:52:36] (step=0004399) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:37] (step=0004400) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:52:39] (step=0004401) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:40] (step=0004402) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:42] (step=0004403) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.48, +[2026-01-27 03:52:44] (step=0004404) Train Loss mse: 0.0082, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:45] (step=0004405) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:47] (step=0004406) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:48] (step=0004407) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:50] (step=0004408) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:52:51] (step=0004409) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:53] (step=0004410) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:52:55] (step=0004411) Train Loss mse: 0.0090, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:52:57] (step=0004412) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:52:58] (step=0004413) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:00] (step=0004414) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:01] (step=0004415) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:53:03] (step=0004416) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:04] (step=0004417) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:06] (step=0004418) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:53:07] (step=0004419) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:09] (step=0004420) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:53:11] (step=0004421) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:53:12] (step=0004422) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:14] (step=0004423) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:53:16] (step=0004424) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:17] (step=0004425) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:53:19] (step=0004426) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:20] (step=0004427) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:22] (step=0004428) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:24] (step=0004429) Train Loss mse: 0.0088, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:53:25] (step=0004430) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:53:27] (step=0004431) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:28] (step=0004432) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:30] (step=0004433) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:53:31] (step=0004434) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:33] (step=0004435) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:34] (step=0004436) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:36] (step=0004437) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:38] (step=0004438) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:53:39] (step=0004439) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:53:41] (step=0004440) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:53:43] (step=0004441) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:44] (step=0004442) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:46] (step=0004443) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:53:47] (step=0004444) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:49] (step=0004445) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:50] (step=0004446) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:52] (step=0004447) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:53:54] (step=0004448) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:53:56] (step=0004449) Train Loss mse: 0.0117, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:53:57] (step=0004450) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:53:58] (step=0004451) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:00] (step=0004452) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:02] (step=0004453) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:54:03] (step=0004454) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:05] (step=0004455) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:54:06] (step=0004456) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:08] (step=0004457) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:54:10] (step=0004458) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:54:11] (step=0004459) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:13] (step=0004460) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:14] (step=0004461) Train Loss mse: 0.0550, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:54:16] (step=0004462) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:54:17] (step=0004463) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:19] (step=0004464) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:54:21] (step=0004465) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:23] (step=0004466) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:54:24] (step=0004467) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:54:26] (step=0004468) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:27] (step=0004469) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:29] (step=0004470) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:30] (step=0004471) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:54:32] (step=0004472) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:54:34] (step=0004473) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:35] (step=0004474) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:54:37] (step=0004475) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:54:39] (step=0004476) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:40] (step=0004477) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:42] (step=0004478) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:43] (step=0004479) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:54:45] (step=0004480) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:54:46] (step=0004481) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:48] (step=0004482) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:54:50] (step=0004483) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:51] (step=0004484) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:54:53] (step=0004485) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:54] (step=0004486) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:56] (step=0004487) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:54:58] (step=0004488) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:54:59] (step=0004489) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:55:01] (step=0004490) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:55:03] (step=0004491) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:04] (step=0004492) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:55:06] (step=0004493) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:08] (step=0004494) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:55:09] (step=0004495) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:11] (step=0004496) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:55:12] (step=0004497) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:14] (step=0004498) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.57, +[2026-01-27 03:55:16] (step=0004499) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:55:22] (step=0004500) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.15, +[2026-01-27 03:55:24] (step=0004501) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:25] (step=0004502) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:27] (step=0004503) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:28] (step=0004504) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:30] (step=0004505) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:32] (step=0004506) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:55:33] (step=0004507) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:55:35] (step=0004508) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:36] (step=0004509) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:38] (step=0004510) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:39] (step=0004511) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:41] (step=0004512) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:42] (step=0004513) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:44] (step=0004514) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:55:46] (step=0004515) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:47] (step=0004516) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:55:49] (step=0004517) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:50] (step=0004518) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:51] (step=0004519) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:53] (step=0004520) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:54] (step=0004521) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:56] (step=0004522) Train Loss mse: 0.0119, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:57] (step=0004523) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:55:59] (step=0004524) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:56:01] (step=0004525) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:02] (step=0004526) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:04] (step=0004527) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:05] (step=0004528) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:07] (step=0004529) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:08] (step=0004530) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:10] (step=0004531) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:11] (step=0004532) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:13] (step=0004533) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:56:15] (step=0004534) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:56:16] (step=0004535) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:18] (step=0004536) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:19] (step=0004537) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:21] (step=0004538) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:22] (step=0004539) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:24] (step=0004540) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:25] (step=0004541) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:27] (step=0004542) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:56:29] (step=0004543) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:56:30] (step=0004544) Train Loss mse: 0.0076, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:32] (step=0004545) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:33] (step=0004546) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:34] (step=0004547) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:36] (step=0004548) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:37] (step=0004549) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:39] (step=0004550) Train Loss mse: 0.0116, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:41] (step=0004551) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:56:42] (step=0004552) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:56:44] (step=0004553) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:45] (step=0004554) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.66, +[2026-01-27 03:56:47] (step=0004555) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:48] (step=0004556) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:50] (step=0004557) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:51] (step=0004558) Train Loss mse: 0.0101, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:53] (step=0004559) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:56:55] (step=0004560) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:56] (step=0004561) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:56:58] (step=0004562) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:56:59] (step=0004563) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:01] (step=0004564) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:02] (step=0004565) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:04] (step=0004566) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:05] (step=0004567) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:57:07] (step=0004568) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:57:08] (step=0004569) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:10] (step=0004570) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:57:12] (step=0004571) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:13] (step=0004572) Train Loss mse: 0.0029, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:15] (step=0004573) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:16] (step=0004574) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:17] (step=0004575) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:19] (step=0004576) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:57:21] (step=0004577) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:22] (step=0004578) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:57:24] (step=0004579) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:25] (step=0004580) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:27] (step=0004581) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:28] (step=0004582) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:30] (step=0004583) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:57:32] (step=0004584) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:33] (step=0004585) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:35] (step=0004586) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:57:36] (step=0004587) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:38] (step=0004588) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:39] (step=0004589) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:41] (step=0004590) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:43] (step=0004591) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:57:44] (step=0004592) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:45] (step=0004593) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:47] (step=0004594) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:57:49] (step=0004595) Train Loss mse: 0.0175, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:50] (step=0004596) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:52] (step=0004597) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:53] (step=0004598) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:57:55] (step=0004599) Train Loss mse: 0.0084, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:56] (step=0004600) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:57:58] (step=0004601) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:00] (step=0004602) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:58:01] (step=0004603) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:02] (step=0004604) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:04] (step=0004605) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.66, +[2026-01-27 03:58:06] (step=0004606) Train Loss mse: 0.0030, Train Loss ce: 0.0000, Train Steps/Sec: 0.54, +[2026-01-27 03:58:07] (step=0004607) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:09] (step=0004608) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:11] (step=0004609) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:58:12] (step=0004610) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:13] (step=0004611) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:15] (step=0004612) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:17] (step=0004613) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:58:18] (step=0004614) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:20] (step=0004615) Train Loss mse: 0.0070, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:21] (step=0004616) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:23] (step=0004617) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:58:24] (step=0004618) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:26] (step=0004619) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:27] (step=0004620) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:29] (step=0004621) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:58:30] (step=0004622) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:32] (step=0004623) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:58:34] (step=0004624) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:35] (step=0004625) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:58:37] (step=0004626) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:38] (step=0004627) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:58:40] (step=0004628) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:41] (step=0004629) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:43] (step=0004630) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:44] (step=0004631) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:46] (step=0004632) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:58:47] (step=0004633) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:49] (step=0004634) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:50] (step=0004635) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:52] (step=0004636) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:58:54] (step=0004637) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:55] (step=0004638) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:58:57] (step=0004639) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:58:58] (step=0004640) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:00] (step=0004641) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:59:02] (step=0004642) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:03] (step=0004643) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:05] (step=0004644) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:59:06] (step=0004645) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:08] (step=0004646) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:59:09] (step=0004647) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:11] (step=0004648) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:59:12] (step=0004649) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:14] (step=0004650) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 03:59:16] (step=0004651) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 03:59:17] (step=0004652) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:19] (step=0004653) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:20] (step=0004654) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:22] (step=0004655) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:59:23] (step=0004656) Train Loss mse: 0.0088, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:25] (step=0004657) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:27] (step=0004658) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:59:28] (step=0004659) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:59:30] (step=0004660) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:31] (step=0004661) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:33] (step=0004662) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:34] (step=0004663) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:36] (step=0004664) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:59:37] (step=0004665) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:39] (step=0004666) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:41] (step=0004667) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 03:59:42] (step=0004668) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:59:44] (step=0004669) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:45] (step=0004670) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:47] (step=0004671) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:48] (step=0004672) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 03:59:50] (step=0004673) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:51] (step=0004674) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:53] (step=0004675) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 03:59:55] (step=0004676) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 03:59:56] (step=0004677) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:58] (step=0004678) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 03:59:59] (step=0004679) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:01] (step=0004680) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:03] (step=0004681) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:00:04] (step=0004682) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:00:06] (step=0004683) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:07] (step=0004684) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:00:09] (step=0004685) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:10] (step=0004686) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:12] (step=0004687) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:13] (step=0004688) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:15] (step=0004689) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:00:17] (step=0004690) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:00:18] (step=0004691) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:20] (step=0004692) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.60, +[2026-01-27 04:00:22] (step=0004693) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:23] (step=0004694) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:24] (step=0004695) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:26] (step=0004696) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:00:28] (step=0004697) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:00:29] (step=0004698) Train Loss mse: 0.0083, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:31] (step=0004699) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:33] (step=0004700) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:00:34] (step=0004701) Train Loss mse: 0.0090, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:36] (step=0004702) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:37] (step=0004703) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:39] (step=0004704) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:00:40] (step=0004705) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:00:42] (step=0004706) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:44] (step=0004707) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:00:45] (step=0004708) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:47] (step=0004709) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:48] (step=0004710) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:50] (step=0004711) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:51] (step=0004712) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:00:53] (step=0004713) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:54] (step=0004714) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:56] (step=0004715) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:00:57] (step=0004716) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:00:59] (step=0004717) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:00] (step=0004718) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:02] (step=0004719) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:01:04] (step=0004720) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:05] (step=0004721) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:07] (step=0004722) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 04:01:09] (step=0004723) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:10] (step=0004724) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:12] (step=0004725) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:13] (step=0004726) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:14] (step=0004727) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:16] (step=0004728) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:18] (step=0004729) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:01:19] (step=0004730) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:21] (step=0004731) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:01:23] (step=0004732) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:01:24] (step=0004733) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:26] (step=0004734) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:27] (step=0004735) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:28] (step=0004736) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:30] (step=0004737) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:31] (step=0004738) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:33] (step=0004739) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:01:35] (step=0004740) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 04:01:37] (step=0004741) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:01:38] (step=0004742) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:40] (step=0004743) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:41] (step=0004744) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:43] (step=0004745) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:44] (step=0004746) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 04:01:46] (step=0004747) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:47] (step=0004748) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:01:49] (step=0004749) Train Loss mse: 0.0027, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 04:01:51] (step=0004750) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:01:53] (step=0004751) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:54] (step=0004752) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:56] (step=0004753) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:57] (step=0004754) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:01:59] (step=0004755) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:00] (step=0004756) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:02] (step=0004757) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:02:03] (step=0004758) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:02:05] (step=0004759) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:02:07] (step=0004760) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:08] (step=0004761) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:10] (step=0004762) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:11] (step=0004763) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:13] (step=0004764) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:14] (step=0004765) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:16] (step=0004766) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 04:02:18] (step=0004767) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 04:02:19] (step=0004768) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:21] (step=0004769) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:22] (step=0004770) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:24] (step=0004771) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:25] (step=0004772) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:27] (step=0004773) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:29] (step=0004774) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:02:31] (step=0004775) Train Loss mse: 0.0084, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 04:02:32] (step=0004776) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:02:34] (step=0004777) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:35] (step=0004778) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 04:02:37] (step=0004779) Train Loss mse: 0.0026, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:38] (step=0004780) Train Loss mse: 0.0071, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:40] (step=0004781) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:02:41] (step=0004782) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:43] (step=0004783) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:02:45] (step=0004784) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:02:46] (step=0004785) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:48] (step=0004786) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:49] (step=0004787) Train Loss mse: 0.0029, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:51] (step=0004788) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:52] (step=0004789) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:02:54] (step=0004790) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:56] (step=0004791) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:02:57] (step=0004792) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:02:59] (step=0004793) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:00] (step=0004794) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:02] (step=0004795) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:03] (step=0004796) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:03:05] (step=0004797) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:06] (step=0004798) Train Loss mse: 0.0054, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:08] (step=0004799) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 04:03:10] (step=0004800) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:11] (step=0004801) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:13] (step=0004802) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:14] (step=0004803) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:16] (step=0004804) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:03:17] (step=0004805) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:19] (step=0004806) Train Loss mse: 0.0064, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:03:21] (step=0004807) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:03:22] (step=0004808) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:24] (step=0004809) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:25] (step=0004810) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:27] (step=0004811) Train Loss mse: 0.0068, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:03:28] (step=0004812) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 04:03:30] (step=0004813) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:32] (step=0004814) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.52, +[2026-01-27 04:03:33] (step=0004815) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:35] (step=0004816) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:36] (step=0004817) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:38] (step=0004818) Train Loss mse: 0.0024, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:39] (step=0004819) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:41] (step=0004820) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:43] (step=0004821) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.49, +[2026-01-27 04:03:44] (step=0004822) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:03:46] (step=0004823) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:47] (step=0004824) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:49] (step=0004825) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:50] (step=0004826) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:52] (step=0004827) Train Loss mse: 0.0028, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:53] (step=0004828) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:55] (step=0004829) Train Loss mse: 0.0066, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:03:57] (step=0004830) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:03:58] (step=0004831) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:04:00] (step=0004832) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:04:01] (step=0004833) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:03] (step=0004834) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:04] (step=0004835) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:06] (step=0004836) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:07] (step=0004837) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 04:04:09] (step=0004838) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:11] (step=0004839) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:04:12] (step=0004840) Train Loss mse: 0.0065, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:04:14] (step=0004841) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:04:16] (step=0004842) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 04:04:17] (step=0004843) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:18] (step=0004844) Train Loss mse: 0.0074, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:20] (step=0004845) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:21] (step=0004846) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:23] (step=0004847) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:25] (step=0004848) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:04:26] (step=0004849) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:04:28] (step=0004850) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:30] (step=0004851) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:04:31] (step=0004852) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:33] (step=0004853) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:34] (step=0004854) Train Loss mse: 0.0076, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:35] (step=0004855) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 04:04:37] (step=0004856) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:04:39] (step=0004857) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:40] (step=0004858) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:04:42] (step=0004859) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:44] (step=0004860) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:04:45] (step=0004861) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:47] (step=0004862) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:48] (step=0004863) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:50] (step=0004864) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:51] (step=0004865) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:04:53] (step=0004866) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:04:54] (step=0004867) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:56] (step=0004868) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:04:58] (step=0004869) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:04:59] (step=0004870) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:01] (step=0004871) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:02] (step=0004872) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:04] (step=0004873) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:05:05] (step=0004874) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:07] (step=0004875) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:05:09] (step=0004876) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:10] (step=0004877) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:05:12] (step=0004878) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:13] (step=0004879) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:15] (step=0004880) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:05:16] (step=0004881) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:18] (step=0004882) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:20] (step=0004883) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:05:21] (step=0004884) Train Loss mse: 0.0078, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:23] (step=0004885) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:24] (step=0004886) Train Loss mse: 0.0067, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:05:26] (step=0004887) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.66, +[2026-01-27 04:05:28] (step=0004888) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:05:29] (step=0004889) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:31] (step=0004890) Train Loss mse: 0.0072, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:05:32] (step=0004891) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:34] (step=0004892) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:35] (step=0004893) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 04:05:37] (step=0004894) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:05:39] (step=0004895) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:05:40] (step=0004896) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:42] (step=0004897) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:43] (step=0004898) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:05:45] (step=0004899) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:46] (step=0004900) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 04:05:48] (step=0004901) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:05:49] (step=0004902) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:51] (step=0004903) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:05:53] (step=0004904) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:54] (step=0004905) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:05:56] (step=0004906) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:57] (step=0004907) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:05:59] (step=0004908) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 04:06:01] (step=0004909) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:06:02] (step=0004910) Train Loss mse: 0.0063, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:06:04] (step=0004911) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:05] (step=0004912) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:07] (step=0004913) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins_step5000 +Preparing Dataset vlm_gym_match_equation_sos_mse_loss_only_evalonce/vlm_gym_match_equation_sos_val +[eval debug] first 3 batch fingerprints: + fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] + fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_mse_loss_only_evalonce'}] +ce_avg: 0.0, mse_avg: 0.009475299157202244 +[2026-01-27 04:06:08] (step=0004914) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:10] (step=0004915) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:12] (step=0004916) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:06:13] (step=0004917) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:15] (step=0004918) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:16] (step=0004919) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:18] (step=0004920) Train Loss mse: 0.0030, Train Loss ce: 0.0000, Train Steps/Sec: 0.48, +[2026-01-27 04:06:20] (step=0004921) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.67, +[2026-01-27 04:06:21] (step=0004922) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:23] (step=0004923) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:24] (step=0004924) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:06:26] (step=0004925) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:27] (step=0004926) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:29] (step=0004927) Train Loss mse: 0.0055, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:30] (step=0004928) Train Loss mse: 0.0109, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:32] (step=0004929) Train Loss mse: 0.0035, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:06:34] (step=0004930) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:06:35] (step=0004931) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:37] (step=0004932) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:38] (step=0004933) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:40] (step=0004934) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:06:41] (step=0004935) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:43] (step=0004936) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:44] (step=0004937) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:46] (step=0004938) Train Loss mse: 0.0049, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:06:48] (step=0004939) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:06:49] (step=0004940) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:51] (step=0004941) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:52] (step=0004942) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:54] (step=0004943) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:06:55] (step=0004944) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:57] (step=0004945) Train Loss mse: 0.0040, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:06:58] (step=0004946) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 04:07:00] (step=0004947) Train Loss mse: 0.0032, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:07:02] (step=0004948) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:07:03] (step=0004949) Train Loss mse: 0.0041, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:05] (step=0004950) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:06] (step=0004951) Train Loss mse: 0.0053, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:08] (step=0004952) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:10] (step=0004953) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:07:11] (step=0004954) Train Loss mse: 0.0042, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:13] (step=0004955) Train Loss mse: 0.0060, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:07:14] (step=0004956) Train Loss mse: 0.0062, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:16] (step=0004957) Train Loss mse: 0.0069, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:07:17] (step=0004958) Train Loss mse: 0.0051, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:19] (step=0004959) Train Loss mse: 0.0061, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:20] (step=0004960) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:22] (step=0004961) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:24] (step=0004962) Train Loss mse: 0.0037, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:07:25] (step=0004963) Train Loss mse: 0.0057, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:27] (step=0004964) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:07:29] (step=0004965) Train Loss mse: 0.0047, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:07:30] (step=0004966) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:31] (step=0004967) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:33] (step=0004968) Train Loss mse: 0.0058, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 04:07:34] (step=0004969) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:36] (step=0004970) Train Loss mse: 0.0048, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:38] (step=0004971) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:07:39] (step=0004972) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.55, +[2026-01-27 04:07:41] (step=0004973) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:43] (step=0004974) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:07:44] (step=0004975) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:46] (step=0004976) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:47] (step=0004977) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:48] (step=0004978) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:50] (step=0004979) Train Loss mse: 0.0031, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:07:52] (step=0004980) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:53] (step=0004981) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:55] (step=0004982) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:07:56] (step=0004983) Train Loss mse: 0.0039, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:58] (step=0004984) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:07:59] (step=0004985) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:08:01] (step=0004986) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:08:03] (step=0004987) Train Loss mse: 0.0059, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:08:04] (step=0004988) Train Loss mse: 0.0034, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:08:06] (step=0004989) Train Loss mse: 0.0044, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:08:07] (step=0004990) Train Loss mse: 0.0052, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:08:09] (step=0004991) Train Loss mse: 0.0036, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:08:10] (step=0004992) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:08:12] (step=0004993) Train Loss mse: 0.0050, Train Loss ce: 0.0000, Train Steps/Sec: 0.69, +[2026-01-27 04:08:14] (step=0004994) Train Loss mse: 0.0056, Train Loss ce: 0.0000, Train Steps/Sec: 0.56, +[2026-01-27 04:08:15] (step=0004995) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:08:17] (step=0004996) Train Loss mse: 0.0046, Train Loss ce: 0.0000, Train Steps/Sec: 0.58, +[2026-01-27 04:08:19] (step=0004997) Train Loss mse: 0.0045, Train Loss ce: 0.0000, Train Steps/Sec: 0.59, +[2026-01-27 04:08:20] (step=0004998) Train Loss mse: 0.0033, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:08:22] (step=0004999) Train Loss mse: 0.0043, Train Loss ce: 0.0000, Train Steps/Sec: 0.68, +[2026-01-27 04:08:28] (step=0005000) Train Loss mse: 0.0038, Train Loss ce: 0.0000, Train Steps/Sec: 0.15, +[2026-01-27 04:08:28] Saving checkpoint to /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/0005000. +/opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:690: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html . + warnings.warn( +[2026-01-27 04:11:00] Done! \ No newline at end of file diff --git a/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/files/wandb-summary.json b/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/files/wandb-summary.json index ceb3b0d5dbe4438a07e047f13716f6dbf2a3426f..cd5ea895b6810b292318d909bb3c7218da3594a0 100644 --- a/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/files/wandb-summary.json +++ b/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/files/wandb-summary.json @@ -1 +1 @@ -{"_runtime": 4767.120189251, "mem_cache": 57446, "_step": 2743, "eval/ce": 0, "eval/mse": 0.01046404242515564, "ce": 0, "lr": 9.425234330683333e-06, "total_norm": 0.1573343575000763, "mem_allocated": 44017.296875, "_timestamp": 1769483291.7665625, "mse": 0.003913791850209236, "total_mse_tokens": 4608, "total_ce_tokens": 0, "total_samples": 8} \ No newline at end of file +{"_runtime": 8534.608449767, "mem_cache": 57122, "_step": 5000, "eval/ce": 0, "eval/mse": 0.009475299157202244, "ce": 0, "lr": 1.0000222278316245e-07, "total_norm": 0.03415835648775101, "mem_allocated": 44017.296875, "_timestamp": 1769486908.7339709, "mse": 0.003806258086115122, "total_mse_tokens": 4608, "total_ce_tokens": 0, "total_samples": 8} \ No newline at end of file diff --git a/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/run-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0.wandb.synced b/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260127_014845-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0/run-vlm_gym_match_equation_sos_one_img_lr2e_5_mse_only_ins-run0.wandb.synced new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391