Upload checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse
Browse files- checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/wandb/offline-run-20251227_170556-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse-run0/files/output.log +159 -159
- checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/wandb/offline-run-20251230_022852-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993-run0/files/output.log +159 -238
- checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/wandb/offline-run-20251230_024203-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed-run0/files/output.log +158 -158
- checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/wandb/offline-run-20260105_043345-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed-run0/files/output.log +0 -831
- checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/wandb/offline-run-20260107_185004-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed-run0/files/output.log +63 -63
checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/wandb/offline-run-20251227_170556-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse-run0/files/output.log
CHANGED
|
@@ -572,165 +572,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 572 |
[[34m2025-12-27 18:26:02[39m] (step=0000561) Train Loss mse: 0.0000, Train Loss ce: 0.0664, Train Steps/Sec: 0.13,
|
| 573 |
[[34m2025-12-27 18:26:10[39m] (step=0000562) Train Loss mse: 0.0000, Train Loss ce: 0.0635, Train Steps/Sec: 0.13,
|
| 574 |
[[34m2025-12-27 18:26:18[39m] (step=0000563) Train Loss mse: 0.0000, Train Loss ce: 0.0685, Train Steps/Sec: 0.13,
|
| 575 |
-
FullyShardedDataParallel(
|
| 576 |
-
(_fsdp_wrapped_module): Bagel(
|
| 577 |
-
(language_model): Qwen2ForCausalLM(
|
| 578 |
-
(model): Qwen2Model(
|
| 579 |
-
(embed_tokens): Embedding(152064, 3584)
|
| 580 |
-
(layers): ModuleList(
|
| 581 |
-
(0-27): 28 x FullyShardedDataParallel(
|
| 582 |
-
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 583 |
-
(_checkpoint_wrapped_module): Qwen2MoTDecoderLayer(
|
| 584 |
-
(self_attn): PackedAttentionMoT(
|
| 585 |
-
(q_proj): Linear(in_features=3584, out_features=3584, bias=True)
|
| 586 |
-
(k_proj): Linear(in_features=3584, out_features=512, bias=True)
|
| 587 |
-
(v_proj): Linear(in_features=3584, out_features=512, bias=True)
|
| 588 |
-
(o_proj): Linear(in_features=3584, out_features=3584, bias=False)
|
| 589 |
-
(q_norm): Qwen2RMSNorm((128,), eps=1e-06)
|
| 590 |
-
(k_norm): Qwen2RMSNorm((128,), eps=1e-06)
|
| 591 |
-
(q_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06)
|
| 592 |
-
(k_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06)
|
| 593 |
-
(q_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=True)
|
| 594 |
-
(k_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True)
|
| 595 |
-
(v_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True)
|
| 596 |
-
(o_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=False)
|
| 597 |
-
)
|
| 598 |
-
(mlp): Qwen2MLP(
|
| 599 |
-
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 600 |
-
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 601 |
-
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
|
| 602 |
-
(act_fn): SiLU()
|
| 603 |
-
)
|
| 604 |
-
(mlp_moe_gen): Qwen2MLP(
|
| 605 |
-
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 606 |
-
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 607 |
-
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
|
| 608 |
-
(act_fn): SiLU()
|
| 609 |
-
)
|
| 610 |
-
(input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 611 |
-
(input_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 612 |
-
(post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 613 |
-
(post_attention_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 614 |
-
)
|
| 615 |
-
)
|
| 616 |
-
)
|
| 617 |
-
)
|
| 618 |
-
(norm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 619 |
-
(norm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 620 |
-
(rotary_emb): Qwen2RotaryEmbedding()
|
| 621 |
-
)
|
| 622 |
-
(lm_head): Linear(in_features=3584, out_features=152064, bias=False)
|
| 623 |
-
)
|
| 624 |
-
(vit_model): SiglipVisionModel(
|
| 625 |
-
(vision_model): FullyShardedDataParallel(
|
| 626 |
-
(_fsdp_wrapped_module): SiglipVisionTransformer(
|
| 627 |
-
(embeddings): SiglipVisionEmbeddings(
|
| 628 |
-
(position_embedding): Embedding(4900, 1152)
|
| 629 |
-
(patch_embedding): Linear(in_features=588, out_features=1152, bias=True)
|
| 630 |
-
)
|
| 631 |
-
(encoder): SiglipEncoder(
|
| 632 |
-
(layers): ModuleList(
|
| 633 |
-
(0-25): 26 x FullyShardedDataParallel(
|
| 634 |
-
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 635 |
-
(_checkpoint_wrapped_module): SiglipEncoderLayer(
|
| 636 |
-
(self_attn): SiglipFlashAttention2(
|
| 637 |
-
(k_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 638 |
-
(v_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 639 |
-
(q_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 640 |
-
(out_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 641 |
-
)
|
| 642 |
-
(layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 643 |
-
(mlp): SiglipMLP(
|
| 644 |
-
(activation_fn): PytorchGELUTanh()
|
| 645 |
-
(fc1): Linear(in_features=1152, out_features=4304, bias=True)
|
| 646 |
-
(fc2): Linear(in_features=4304, out_features=1152, bias=True)
|
| 647 |
-
)
|
| 648 |
-
(layer_norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 649 |
-
)
|
| 650 |
-
)
|
| 651 |
-
)
|
| 652 |
-
)
|
| 653 |
-
)
|
| 654 |
-
(post_layernorm): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 655 |
-
)
|
| 656 |
-
)
|
| 657 |
-
)
|
| 658 |
-
(connector): FullyShardedDataParallel(
|
| 659 |
-
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 660 |
-
(_checkpoint_wrapped_module): MLPconnector(
|
| 661 |
-
(activation_fn): PytorchGELUTanh()
|
| 662 |
-
(fc1): Linear(in_features=1152, out_features=3584, bias=True)
|
| 663 |
-
(fc2): Linear(in_features=3584, out_features=3584, bias=True)
|
| 664 |
-
)
|
| 665 |
-
)
|
| 666 |
-
)
|
| 667 |
-
(vit_pos_embed): FullyShardedDataParallel(
|
| 668 |
-
(_fsdp_wrapped_module): PositionEmbedding()
|
| 669 |
-
)
|
| 670 |
-
)
|
| 671 |
-
)
|
| 672 |
-
_flat_param True
|
| 673 |
-
language_model.model.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 674 |
-
language_model.model.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 675 |
-
language_model.model.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 676 |
-
language_model.model.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 677 |
-
language_model.model.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 678 |
-
language_model.model.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 679 |
-
language_model.model.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 680 |
-
language_model.model.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 681 |
-
language_model.model.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 682 |
-
language_model.model.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 683 |
-
language_model.model.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 684 |
-
language_model.model.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 685 |
-
language_model.model.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 686 |
-
language_model.model.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 687 |
-
language_model.model.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 688 |
-
language_model.model.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 689 |
-
language_model.model.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 690 |
-
language_model.model.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 691 |
-
language_model.model.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 692 |
-
language_model.model.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 693 |
-
language_model.model.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 694 |
-
language_model.model.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 695 |
-
language_model.model.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 696 |
-
language_model.model.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 697 |
-
language_model.model.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 698 |
-
language_model.model.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 699 |
-
language_model.model.layers.26._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 700 |
-
language_model.model.layers.27._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 701 |
-
vit_model.vision_model._fsdp_wrapped_module._flat_param True
|
| 702 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 703 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 704 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 705 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 706 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 707 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 708 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 709 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 710 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 711 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 712 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 713 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 714 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 715 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 716 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 717 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 718 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 719 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 720 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 721 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 722 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 723 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 724 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 725 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 726 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 727 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 728 |
-
connector._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 729 |
-
vit_pos_embed._fsdp_wrapped_module._flat_param False
|
| 730 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_junyi/vlm_gym_jigsaw_train
|
| 731 |
-
Warning: failed loading hashes from /home/jiaxin/bagel_train/hashes_test_set_v10.json: [Errno 2] No such file or directory: '/home/jiaxin/bagel_train/hashes_test_set_v10.json'
|
| 732 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_junyi/vlm_gym_jigsaw_val
|
| 733 |
-
Warning: failed loading hashes from /home/jiaxin/bagel_train/hashes_test_set_v10.json: [Errno 2] No such file or directory: '/home/jiaxin/bagel_train/hashes_test_set_v10.json'
|
| 734 |
[[34m2025-12-27 18:26:26[39m] (step=0000564) Train Loss mse: 0.0000, Train Loss ce: 0.0656, Train Steps/Sec: 0.13,
|
| 735 |
[[34m2025-12-27 18:26:34[39m] (step=0000565) Train Loss mse: 0.0000, Train Loss ce: 0.0642, Train Steps/Sec: 0.13,
|
| 736 |
[[34m2025-12-27 18:26:41[39m] (step=0000566) Train Loss mse: 0.0000, Train Loss ce: 0.0686, Train Steps/Sec: 0.13,
|
|
@@ -1083,6 +924,165 @@ Warning: failed loading hashes from /home/jiaxin/bagel_train/hashes_test_set_v10
|
|
| 1083 |
[[34m2025-12-27 19:12:07[39m] (step=0000913) Train Loss mse: 0.0000, Train Loss ce: 0.0636, Train Steps/Sec: 0.13,
|
| 1084 |
[[34m2025-12-27 19:12:14[39m] (step=0000914) Train Loss mse: 0.0000, Train Loss ce: 0.0631, Train Steps/Sec: 0.13,
|
| 1085 |
[[34m2025-12-27 19:12:23[39m] (step=0000915) Train Loss mse: 0.0000, Train Loss ce: 0.0649, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1086 |
[[34m2025-12-27 19:12:30[39m] (step=0000916) Train Loss mse: 0.0000, Train Loss ce: 0.0641, Train Steps/Sec: 0.13,
|
| 1087 |
[[34m2025-12-27 19:12:38[39m] (step=0000917) Train Loss mse: 0.0000, Train Loss ce: 0.0652, Train Steps/Sec: 0.13,
|
| 1088 |
[[34m2025-12-27 19:12:46[39m] (step=0000918) Train Loss mse: 0.0000, Train Loss ce: 0.0645, Train Steps/Sec: 0.13,
|
|
|
|
| 572 |
[[34m2025-12-27 18:26:02[39m] (step=0000561) Train Loss mse: 0.0000, Train Loss ce: 0.0664, Train Steps/Sec: 0.13,
|
| 573 |
[[34m2025-12-27 18:26:10[39m] (step=0000562) Train Loss mse: 0.0000, Train Loss ce: 0.0635, Train Steps/Sec: 0.13,
|
| 574 |
[[34m2025-12-27 18:26:18[39m] (step=0000563) Train Loss mse: 0.0000, Train Loss ce: 0.0685, Train Steps/Sec: 0.13,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 575 |
[[34m2025-12-27 18:26:26[39m] (step=0000564) Train Loss mse: 0.0000, Train Loss ce: 0.0656, Train Steps/Sec: 0.13,
|
| 576 |
[[34m2025-12-27 18:26:34[39m] (step=0000565) Train Loss mse: 0.0000, Train Loss ce: 0.0642, Train Steps/Sec: 0.13,
|
| 577 |
[[34m2025-12-27 18:26:41[39m] (step=0000566) Train Loss mse: 0.0000, Train Loss ce: 0.0686, Train Steps/Sec: 0.13,
|
|
|
|
| 924 |
[[34m2025-12-27 19:12:07[39m] (step=0000913) Train Loss mse: 0.0000, Train Loss ce: 0.0636, Train Steps/Sec: 0.13,
|
| 925 |
[[34m2025-12-27 19:12:14[39m] (step=0000914) Train Loss mse: 0.0000, Train Loss ce: 0.0631, Train Steps/Sec: 0.13,
|
| 926 |
[[34m2025-12-27 19:12:23[39m] (step=0000915) Train Loss mse: 0.0000, Train Loss ce: 0.0649, Train Steps/Sec: 0.12,
|
| 927 |
+
FullyShardedDataParallel(
|
| 928 |
+
(_fsdp_wrapped_module): Bagel(
|
| 929 |
+
(language_model): Qwen2ForCausalLM(
|
| 930 |
+
(model): Qwen2Model(
|
| 931 |
+
(embed_tokens): Embedding(152064, 3584)
|
| 932 |
+
(layers): ModuleList(
|
| 933 |
+
(0-27): 28 x FullyShardedDataParallel(
|
| 934 |
+
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 935 |
+
(_checkpoint_wrapped_module): Qwen2MoTDecoderLayer(
|
| 936 |
+
(self_attn): PackedAttentionMoT(
|
| 937 |
+
(q_proj): Linear(in_features=3584, out_features=3584, bias=True)
|
| 938 |
+
(k_proj): Linear(in_features=3584, out_features=512, bias=True)
|
| 939 |
+
(v_proj): Linear(in_features=3584, out_features=512, bias=True)
|
| 940 |
+
(o_proj): Linear(in_features=3584, out_features=3584, bias=False)
|
| 941 |
+
(q_norm): Qwen2RMSNorm((128,), eps=1e-06)
|
| 942 |
+
(k_norm): Qwen2RMSNorm((128,), eps=1e-06)
|
| 943 |
+
(q_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06)
|
| 944 |
+
(k_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06)
|
| 945 |
+
(q_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=True)
|
| 946 |
+
(k_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True)
|
| 947 |
+
(v_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True)
|
| 948 |
+
(o_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=False)
|
| 949 |
+
)
|
| 950 |
+
(mlp): Qwen2MLP(
|
| 951 |
+
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 952 |
+
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 953 |
+
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
|
| 954 |
+
(act_fn): SiLU()
|
| 955 |
+
)
|
| 956 |
+
(mlp_moe_gen): Qwen2MLP(
|
| 957 |
+
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 958 |
+
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 959 |
+
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
|
| 960 |
+
(act_fn): SiLU()
|
| 961 |
+
)
|
| 962 |
+
(input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 963 |
+
(input_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 964 |
+
(post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 965 |
+
(post_attention_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 966 |
+
)
|
| 967 |
+
)
|
| 968 |
+
)
|
| 969 |
+
)
|
| 970 |
+
(norm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 971 |
+
(norm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 972 |
+
(rotary_emb): Qwen2RotaryEmbedding()
|
| 973 |
+
)
|
| 974 |
+
(lm_head): Linear(in_features=3584, out_features=152064, bias=False)
|
| 975 |
+
)
|
| 976 |
+
(vit_model): SiglipVisionModel(
|
| 977 |
+
(vision_model): FullyShardedDataParallel(
|
| 978 |
+
(_fsdp_wrapped_module): SiglipVisionTransformer(
|
| 979 |
+
(embeddings): SiglipVisionEmbeddings(
|
| 980 |
+
(position_embedding): Embedding(4900, 1152)
|
| 981 |
+
(patch_embedding): Linear(in_features=588, out_features=1152, bias=True)
|
| 982 |
+
)
|
| 983 |
+
(encoder): SiglipEncoder(
|
| 984 |
+
(layers): ModuleList(
|
| 985 |
+
(0-25): 26 x FullyShardedDataParallel(
|
| 986 |
+
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 987 |
+
(_checkpoint_wrapped_module): SiglipEncoderLayer(
|
| 988 |
+
(self_attn): SiglipFlashAttention2(
|
| 989 |
+
(k_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 990 |
+
(v_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 991 |
+
(q_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 992 |
+
(out_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 993 |
+
)
|
| 994 |
+
(layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 995 |
+
(mlp): SiglipMLP(
|
| 996 |
+
(activation_fn): PytorchGELUTanh()
|
| 997 |
+
(fc1): Linear(in_features=1152, out_features=4304, bias=True)
|
| 998 |
+
(fc2): Linear(in_features=4304, out_features=1152, bias=True)
|
| 999 |
+
)
|
| 1000 |
+
(layer_norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 1001 |
+
)
|
| 1002 |
+
)
|
| 1003 |
+
)
|
| 1004 |
+
)
|
| 1005 |
+
)
|
| 1006 |
+
(post_layernorm): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 1007 |
+
)
|
| 1008 |
+
)
|
| 1009 |
+
)
|
| 1010 |
+
(connector): FullyShardedDataParallel(
|
| 1011 |
+
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 1012 |
+
(_checkpoint_wrapped_module): MLPconnector(
|
| 1013 |
+
(activation_fn): PytorchGELUTanh()
|
| 1014 |
+
(fc1): Linear(in_features=1152, out_features=3584, bias=True)
|
| 1015 |
+
(fc2): Linear(in_features=3584, out_features=3584, bias=True)
|
| 1016 |
+
)
|
| 1017 |
+
)
|
| 1018 |
+
)
|
| 1019 |
+
(vit_pos_embed): FullyShardedDataParallel(
|
| 1020 |
+
(_fsdp_wrapped_module): PositionEmbedding()
|
| 1021 |
+
)
|
| 1022 |
+
)
|
| 1023 |
+
)
|
| 1024 |
+
_flat_param True
|
| 1025 |
+
language_model.model.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1026 |
+
language_model.model.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1027 |
+
language_model.model.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1028 |
+
language_model.model.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1029 |
+
language_model.model.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1030 |
+
language_model.model.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1031 |
+
language_model.model.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1032 |
+
language_model.model.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1033 |
+
language_model.model.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1034 |
+
language_model.model.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1035 |
+
language_model.model.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1036 |
+
language_model.model.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1037 |
+
language_model.model.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1038 |
+
language_model.model.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1039 |
+
language_model.model.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1040 |
+
language_model.model.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1041 |
+
language_model.model.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1042 |
+
language_model.model.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1043 |
+
language_model.model.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1044 |
+
language_model.model.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1045 |
+
language_model.model.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1046 |
+
language_model.model.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1047 |
+
language_model.model.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1048 |
+
language_model.model.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1049 |
+
language_model.model.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1050 |
+
language_model.model.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1051 |
+
language_model.model.layers.26._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1052 |
+
language_model.model.layers.27._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1053 |
+
vit_model.vision_model._fsdp_wrapped_module._flat_param True
|
| 1054 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1055 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1056 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1057 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1058 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1059 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1060 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1061 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1062 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1063 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1064 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1065 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1066 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1067 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1068 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1069 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1070 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1071 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1072 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1073 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1074 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1075 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1076 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1077 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1078 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1079 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1080 |
+
connector._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1081 |
+
vit_pos_embed._fsdp_wrapped_module._flat_param False
|
| 1082 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_junyi/vlm_gym_jigsaw_train
|
| 1083 |
+
Warning: failed loading hashes from /home/jiaxin/bagel_train/hashes_test_set_v10.json: [Errno 2] No such file or directory: '/home/jiaxin/bagel_train/hashes_test_set_v10.json'
|
| 1084 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_junyi/vlm_gym_jigsaw_val
|
| 1085 |
+
Warning: failed loading hashes from /home/jiaxin/bagel_train/hashes_test_set_v10.json: [Errno 2] No such file or directory: '/home/jiaxin/bagel_train/hashes_test_set_v10.json'
|
| 1086 |
[[34m2025-12-27 19:12:30[39m] (step=0000916) Train Loss mse: 0.0000, Train Loss ce: 0.0641, Train Steps/Sec: 0.13,
|
| 1087 |
[[34m2025-12-27 19:12:38[39m] (step=0000917) Train Loss mse: 0.0000, Train Loss ce: 0.0652, Train Steps/Sec: 0.13,
|
| 1088 |
[[34m2025-12-27 19:12:46[39m] (step=0000918) Train Loss mse: 0.0000, Train Loss ce: 0.0645, Train Steps/Sec: 0.13,
|
checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/wandb/offline-run-20251230_022852-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993-run0/files/output.log
CHANGED
|
@@ -1,3 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
wandb: Detected [huggingface_hub.inference] in use.
|
| 2 |
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
|
| 3 |
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
@@ -979,165 +1138,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 979 |
[[34m2025-12-30 04:42:35[39m] (step=0000968) Train Loss mse: 0.0000, Train Loss ce: 0.0645, Train Steps/Sec: 0.13,
|
| 980 |
[[34m2025-12-30 04:42:43[39m] (step=0000969) Train Loss mse: 0.0000, Train Loss ce: 0.0641, Train Steps/Sec: 0.13,
|
| 981 |
[[34m2025-12-30 04:42:51[39m] (step=0000970) Train Loss mse: 0.0000, Train Loss ce: 0.0653, Train Steps/Sec: 0.13,
|
| 982 |
-
FullyShardedDataParallel(
|
| 983 |
-
(_fsdp_wrapped_module): Bagel(
|
| 984 |
-
(language_model): Qwen2ForCausalLM(
|
| 985 |
-
(model): Qwen2Model(
|
| 986 |
-
(embed_tokens): Embedding(152064, 3584)
|
| 987 |
-
(layers): ModuleList(
|
| 988 |
-
(0-27): 28 x FullyShardedDataParallel(
|
| 989 |
-
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 990 |
-
(_checkpoint_wrapped_module): Qwen2MoTDecoderLayer(
|
| 991 |
-
(self_attn): PackedAttentionMoT(
|
| 992 |
-
(q_proj): Linear(in_features=3584, out_features=3584, bias=True)
|
| 993 |
-
(k_proj): Linear(in_features=3584, out_features=512, bias=True)
|
| 994 |
-
(v_proj): Linear(in_features=3584, out_features=512, bias=True)
|
| 995 |
-
(o_proj): Linear(in_features=3584, out_features=3584, bias=False)
|
| 996 |
-
(q_norm): Qwen2RMSNorm((128,), eps=1e-06)
|
| 997 |
-
(k_norm): Qwen2RMSNorm((128,), eps=1e-06)
|
| 998 |
-
(q_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06)
|
| 999 |
-
(k_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06)
|
| 1000 |
-
(q_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=True)
|
| 1001 |
-
(k_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True)
|
| 1002 |
-
(v_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True)
|
| 1003 |
-
(o_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=False)
|
| 1004 |
-
)
|
| 1005 |
-
(mlp): Qwen2MLP(
|
| 1006 |
-
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 1007 |
-
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 1008 |
-
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
|
| 1009 |
-
(act_fn): SiLU()
|
| 1010 |
-
)
|
| 1011 |
-
(mlp_moe_gen): Qwen2MLP(
|
| 1012 |
-
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 1013 |
-
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 1014 |
-
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
|
| 1015 |
-
(act_fn): SiLU()
|
| 1016 |
-
)
|
| 1017 |
-
(input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 1018 |
-
(input_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 1019 |
-
(post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 1020 |
-
(post_attention_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 1021 |
-
)
|
| 1022 |
-
)
|
| 1023 |
-
)
|
| 1024 |
-
)
|
| 1025 |
-
(norm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 1026 |
-
(norm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 1027 |
-
(rotary_emb): Qwen2RotaryEmbedding()
|
| 1028 |
-
)
|
| 1029 |
-
(lm_head): Linear(in_features=3584, out_features=152064, bias=False)
|
| 1030 |
-
)
|
| 1031 |
-
(vit_model): SiglipVisionModel(
|
| 1032 |
-
(vision_model): FullyShardedDataParallel(
|
| 1033 |
-
(_fsdp_wrapped_module): SiglipVisionTransformer(
|
| 1034 |
-
(embeddings): SiglipVisionEmbeddings(
|
| 1035 |
-
(position_embedding): Embedding(4900, 1152)
|
| 1036 |
-
(patch_embedding): Linear(in_features=588, out_features=1152, bias=True)
|
| 1037 |
-
)
|
| 1038 |
-
(encoder): SiglipEncoder(
|
| 1039 |
-
(layers): ModuleList(
|
| 1040 |
-
(0-25): 26 x FullyShardedDataParallel(
|
| 1041 |
-
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 1042 |
-
(_checkpoint_wrapped_module): SiglipEncoderLayer(
|
| 1043 |
-
(self_attn): SiglipFlashAttention2(
|
| 1044 |
-
(k_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 1045 |
-
(v_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 1046 |
-
(q_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 1047 |
-
(out_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 1048 |
-
)
|
| 1049 |
-
(layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 1050 |
-
(mlp): SiglipMLP(
|
| 1051 |
-
(activation_fn): PytorchGELUTanh()
|
| 1052 |
-
(fc1): Linear(in_features=1152, out_features=4304, bias=True)
|
| 1053 |
-
(fc2): Linear(in_features=4304, out_features=1152, bias=True)
|
| 1054 |
-
)
|
| 1055 |
-
(layer_norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 1056 |
-
)
|
| 1057 |
-
)
|
| 1058 |
-
)
|
| 1059 |
-
)
|
| 1060 |
-
)
|
| 1061 |
-
(post_layernorm): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 1062 |
-
)
|
| 1063 |
-
)
|
| 1064 |
-
)
|
| 1065 |
-
(connector): FullyShardedDataParallel(
|
| 1066 |
-
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 1067 |
-
(_checkpoint_wrapped_module): MLPconnector(
|
| 1068 |
-
(activation_fn): PytorchGELUTanh()
|
| 1069 |
-
(fc1): Linear(in_features=1152, out_features=3584, bias=True)
|
| 1070 |
-
(fc2): Linear(in_features=3584, out_features=3584, bias=True)
|
| 1071 |
-
)
|
| 1072 |
-
)
|
| 1073 |
-
)
|
| 1074 |
-
(vit_pos_embed): FullyShardedDataParallel(
|
| 1075 |
-
(_fsdp_wrapped_module): PositionEmbedding()
|
| 1076 |
-
)
|
| 1077 |
-
)
|
| 1078 |
-
)
|
| 1079 |
-
_flat_param True
|
| 1080 |
-
language_model.model.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1081 |
-
language_model.model.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1082 |
-
language_model.model.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1083 |
-
language_model.model.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1084 |
-
language_model.model.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1085 |
-
language_model.model.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1086 |
-
language_model.model.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1087 |
-
language_model.model.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1088 |
-
language_model.model.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1089 |
-
language_model.model.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1090 |
-
language_model.model.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1091 |
-
language_model.model.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1092 |
-
language_model.model.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1093 |
-
language_model.model.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1094 |
-
language_model.model.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1095 |
-
language_model.model.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1096 |
-
language_model.model.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1097 |
-
language_model.model.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1098 |
-
language_model.model.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1099 |
-
language_model.model.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1100 |
-
language_model.model.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1101 |
-
language_model.model.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1102 |
-
language_model.model.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1103 |
-
language_model.model.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1104 |
-
language_model.model.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1105 |
-
language_model.model.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1106 |
-
language_model.model.layers.26._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1107 |
-
language_model.model.layers.27._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1108 |
-
vit_model.vision_model._fsdp_wrapped_module._flat_param True
|
| 1109 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1110 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1111 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1112 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1113 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1114 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1115 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1116 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1117 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1118 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1119 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1120 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1121 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1122 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1123 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1124 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1125 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1126 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1127 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1128 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1129 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1130 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1131 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1132 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1133 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1134 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1135 |
-
connector._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 1136 |
-
vit_pos_embed._fsdp_wrapped_module._flat_param False
|
| 1137 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_junyi/vlm_gym_jigsaw_train
|
| 1138 |
-
Warning: failed loading hashes from /home/jiaxin/bagel_train/hashes_test_set_v10.json: [Errno 2] No such file or directory: '/home/jiaxin/bagel_train/hashes_test_set_v10.json'
|
| 1139 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_junyi/vlm_gym_jigsaw_val
|
| 1140 |
-
Warning: failed loading hashes from /home/jiaxin/bagel_train/hashes_test_set_v10.json: [Errno 2] No such file or directory: '/home/jiaxin/bagel_train/hashes_test_set_v10.json'
|
| 1141 |
[[34m2025-12-30 04:42:58[39m] (step=0000971) Train Loss mse: 0.0000, Train Loss ce: 0.0651, Train Steps/Sec: 0.13,
|
| 1142 |
[[34m2025-12-30 04:43:06[39m] (step=0000972) Train Loss mse: 0.0000, Train Loss ce: 0.0663, Train Steps/Sec: 0.12,
|
| 1143 |
[[34m2025-12-30 04:43:14[39m] (step=0000973) Train Loss mse: 0.0000, Train Loss ce: 0.0629, Train Steps/Sec: 0.13,
|
|
@@ -3368,85 +3368,6 @@ Warning: failed loading hashes from /home/jiaxin/bagel_train/hashes_test_set_v10
|
|
| 3368 |
[[34m2025-12-30 09:37:59[39m] (step=0003195) Train Loss mse: 0.0000, Train Loss ce: 0.0602, Train Steps/Sec: 0.12,
|
| 3369 |
[[34m2025-12-30 09:38:06[39m] (step=0003196) Train Loss mse: 0.0000, Train Loss ce: 0.0614, Train Steps/Sec: 0.13,
|
| 3370 |
[[34m2025-12-30 09:38:14[39m] (step=0003197) Train Loss mse: 0.0000, Train Loss ce: 0.0616, Train Steps/Sec: 0.13,
|
| 3371 |
-
[[34m2025-12-30 09:38:22[39m] (step=0003198) Train Loss mse: 0.0000, Train Loss ce: 0.0613, Train Steps/Sec: 0.13,
|
| 3372 |
-
[[34m2025-12-30 09:38:30[39m] (step=0003199) Train Loss mse: 0.0000, Train Loss ce: 0.0623, Train Steps/Sec: 0.13,
|
| 3373 |
-
[[34m2025-12-30 09:38:38[39m] (step=0003200) Train Loss mse: 0.0000, Train Loss ce: 0.0629, Train Steps/Sec: 0.12,
|
| 3374 |
-
[[34m2025-12-30 09:38:46[39m] (step=0003201) Train Loss mse: 0.0000, Train Loss ce: 0.0629, Train Steps/Sec: 0.13,
|
| 3375 |
-
[[34m2025-12-30 09:38:54[39m] (step=0003202) Train Loss mse: 0.0000, Train Loss ce: 0.0619, Train Steps/Sec: 0.13,
|
| 3376 |
-
[[34m2025-12-30 09:39:01[39m] (step=0003203) Train Loss mse: 0.0000, Train Loss ce: 0.0616, Train Steps/Sec: 0.13,
|
| 3377 |
-
[[34m2025-12-30 09:39:10[39m] (step=0003204) Train Loss mse: 0.0000, Train Loss ce: 0.0611, Train Steps/Sec: 0.12,
|
| 3378 |
-
[[34m2025-12-30 09:39:18[39m] (step=0003205) Train Loss mse: 0.0000, Train Loss ce: 0.0624, Train Steps/Sec: 0.12,
|
| 3379 |
-
[[34m2025-12-30 09:39:26[39m] (step=0003206) Train Loss mse: 0.0000, Train Loss ce: 0.0594, Train Steps/Sec: 0.12,
|
| 3380 |
-
[[34m2025-12-30 09:39:34[39m] (step=0003207) Train Loss mse: 0.0000, Train Loss ce: 0.0616, Train Steps/Sec: 0.13,
|
| 3381 |
-
[[34m2025-12-30 09:39:41[39m] (step=0003208) Train Loss mse: 0.0000, Train Loss ce: 0.0626, Train Steps/Sec: 0.13,
|
| 3382 |
-
[[34m2025-12-30 09:39:50[39m] (step=0003209) Train Loss mse: 0.0000, Train Loss ce: 0.0605, Train Steps/Sec: 0.12,
|
| 3383 |
-
[[34m2025-12-30 09:39:57[39m] (step=0003210) Train Loss mse: 0.0000, Train Loss ce: 0.0614, Train Steps/Sec: 0.13,
|
| 3384 |
-
[[34m2025-12-30 09:40:05[39m] (step=0003211) Train Loss mse: 0.0000, Train Loss ce: 0.0634, Train Steps/Sec: 0.13,
|
| 3385 |
-
[[34m2025-12-30 09:40:13[39m] (step=0003212) Train Loss mse: 0.0000, Train Loss ce: 0.0594, Train Steps/Sec: 0.13,
|
| 3386 |
-
[[34m2025-12-30 09:40:21[39m] (step=0003213) Train Loss mse: 0.0000, Train Loss ce: 0.0626, Train Steps/Sec: 0.12,
|
| 3387 |
-
[[34m2025-12-30 09:40:29[39m] (step=0003214) Train Loss mse: 0.0000, Train Loss ce: 0.0615, Train Steps/Sec: 0.12,
|
| 3388 |
-
[[34m2025-12-30 09:40:37[39m] (step=0003215) Train Loss mse: 0.0000, Train Loss ce: 0.0627, Train Steps/Sec: 0.13,
|
| 3389 |
-
[[34m2025-12-30 09:40:45[39m] (step=0003216) Train Loss mse: 0.0000, Train Loss ce: 0.0613, Train Steps/Sec: 0.13,
|
| 3390 |
-
[[34m2025-12-30 09:40:53[39m] (step=0003217) Train Loss mse: 0.0000, Train Loss ce: 0.0627, Train Steps/Sec: 0.13,
|
| 3391 |
-
[[34m2025-12-30 09:41:01[39m] (step=0003218) Train Loss mse: 0.0000, Train Loss ce: 0.0615, Train Steps/Sec: 0.12,
|
| 3392 |
-
[[34m2025-12-30 09:41:08[39m] (step=0003219) Train Loss mse: 0.0000, Train Loss ce: 0.0612, Train Steps/Sec: 0.13,
|
| 3393 |
-
[[34m2025-12-30 09:41:16[39m] (step=0003220) Train Loss mse: 0.0000, Train Loss ce: 0.0616, Train Steps/Sec: 0.13,
|
| 3394 |
-
[[34m2025-12-30 09:41:24[39m] (step=0003221) Train Loss mse: 0.0000, Train Loss ce: 0.0610, Train Steps/Sec: 0.13,
|
| 3395 |
-
[[34m2025-12-30 09:41:32[39m] (step=0003222) Train Loss mse: 0.0000, Train Loss ce: 0.0621, Train Steps/Sec: 0.12,
|
| 3396 |
-
[[34m2025-12-30 09:41:40[39m] (step=0003223) Train Loss mse: 0.0000, Train Loss ce: 0.0612, Train Steps/Sec: 0.13,
|
| 3397 |
-
[[34m2025-12-30 09:41:48[39m] (step=0003224) Train Loss mse: 0.0000, Train Loss ce: 0.0619, Train Steps/Sec: 0.13,
|
| 3398 |
-
[[34m2025-12-30 09:41:55[39m] (step=0003225) Train Loss mse: 0.0000, Train Loss ce: 0.0628, Train Steps/Sec: 0.13,
|
| 3399 |
-
[[34m2025-12-30 09:42:03[39m] (step=0003226) Train Loss mse: 0.0000, Train Loss ce: 0.0623, Train Steps/Sec: 0.13,
|
| 3400 |
-
[[34m2025-12-30 09:42:11[39m] (step=0003227) Train Loss mse: 0.0000, Train Loss ce: 0.0622, Train Steps/Sec: 0.12,
|
| 3401 |
-
[[34m2025-12-30 09:42:19[39m] (step=0003228) Train Loss mse: 0.0000, Train Loss ce: 0.0616, Train Steps/Sec: 0.13,
|
| 3402 |
-
[[34m2025-12-30 09:42:27[39m] (step=0003229) Train Loss mse: 0.0000, Train Loss ce: 0.0625, Train Steps/Sec: 0.13,
|
| 3403 |
-
[[34m2025-12-30 09:42:35[39m] (step=0003230) Train Loss mse: 0.0000, Train Loss ce: 0.0613, Train Steps/Sec: 0.13,
|
| 3404 |
-
[[34m2025-12-30 09:42:43[39m] (step=0003231) Train Loss mse: 0.0000, Train Loss ce: 0.0626, Train Steps/Sec: 0.13,
|
| 3405 |
-
[[34m2025-12-30 09:42:50[39m] (step=0003232) Train Loss mse: 0.0000, Train Loss ce: 0.0622, Train Steps/Sec: 0.13,
|
| 3406 |
-
[[34m2025-12-30 09:42:58[39m] (step=0003233) Train Loss mse: 0.0000, Train Loss ce: 0.0611, Train Steps/Sec: 0.13,
|
| 3407 |
-
[[34m2025-12-30 09:43:06[39m] (step=0003234) Train Loss mse: 0.0000, Train Loss ce: 0.0612, Train Steps/Sec: 0.13,
|
| 3408 |
-
[[34m2025-12-30 09:43:14[39m] (step=0003235) Train Loss mse: 0.0000, Train Loss ce: 0.0609, Train Steps/Sec: 0.12,
|
| 3409 |
-
[[34m2025-12-30 09:43:22[39m] (step=0003236) Train Loss mse: 0.0000, Train Loss ce: 0.0607, Train Steps/Sec: 0.13,
|
| 3410 |
-
[[34m2025-12-30 09:43:30[39m] (step=0003237) Train Loss mse: 0.0000, Train Loss ce: 0.0610, Train Steps/Sec: 0.12,
|
| 3411 |
-
[[34m2025-12-30 09:43:38[39m] (step=0003238) Train Loss mse: 0.0000, Train Loss ce: 0.0629, Train Steps/Sec: 0.13,
|
| 3412 |
-
[[34m2025-12-30 09:43:46[39m] (step=0003239) Train Loss mse: 0.0000, Train Loss ce: 0.0622, Train Steps/Sec: 0.13,
|
| 3413 |
-
[[34m2025-12-30 09:43:54[39m] (step=0003240) Train Loss mse: 0.0000, Train Loss ce: 0.0606, Train Steps/Sec: 0.12,
|
| 3414 |
-
[[34m2025-12-30 09:44:02[39m] (step=0003241) Train Loss mse: 0.0000, Train Loss ce: 0.0603, Train Steps/Sec: 0.12,
|
| 3415 |
-
[[34m2025-12-30 09:44:10[39m] (step=0003242) Train Loss mse: 0.0000, Train Loss ce: 0.0612, Train Steps/Sec: 0.13,
|
| 3416 |
-
[[34m2025-12-30 09:44:17[39m] (step=0003243) Train Loss mse: 0.0000, Train Loss ce: 0.0627, Train Steps/Sec: 0.13,
|
| 3417 |
-
[[34m2025-12-30 09:44:25[39m] (step=0003244) Train Loss mse: 0.0000, Train Loss ce: 0.0616, Train Steps/Sec: 0.13,
|
| 3418 |
-
[[34m2025-12-30 09:44:33[39m] (step=0003245) Train Loss mse: 0.0000, Train Loss ce: 0.0617, Train Steps/Sec: 0.13,
|
| 3419 |
-
[[34m2025-12-30 09:44:41[39m] (step=0003246) Train Loss mse: 0.0000, Train Loss ce: 0.0613, Train Steps/Sec: 0.12,
|
| 3420 |
-
[[34m2025-12-30 09:44:49[39m] (step=0003247) Train Loss mse: 0.0000, Train Loss ce: 0.0607, Train Steps/Sec: 0.13,
|
| 3421 |
-
[[34m2025-12-30 09:44:57[39m] (step=0003248) Train Loss mse: 0.0000, Train Loss ce: 0.0602, Train Steps/Sec: 0.13,
|
| 3422 |
-
[[34m2025-12-30 09:45:05[39m] (step=0003249) Train Loss mse: 0.0000, Train Loss ce: 0.0614, Train Steps/Sec: 0.13,
|
| 3423 |
-
[[34m2025-12-30 09:45:13[39m] (step=0003250) Train Loss mse: 0.0000, Train Loss ce: 0.0606, Train Steps/Sec: 0.12,
|
| 3424 |
-
[[34m2025-12-30 09:45:21[39m] (step=0003251) Train Loss mse: 0.0000, Train Loss ce: 0.0617, Train Steps/Sec: 0.13,
|
| 3425 |
-
[[34m2025-12-30 09:45:29[39m] (step=0003252) Train Loss mse: 0.0000, Train Loss ce: 0.0617, Train Steps/Sec: 0.13,
|
| 3426 |
-
[[34m2025-12-30 09:45:36[39m] (step=0003253) Train Loss mse: 0.0000, Train Loss ce: 0.0615, Train Steps/Sec: 0.13,
|
| 3427 |
-
[[34m2025-12-30 09:45:44[39m] (step=0003254) Train Loss mse: 0.0000, Train Loss ce: 0.0600, Train Steps/Sec: 0.13,
|
| 3428 |
-
[[34m2025-12-30 09:45:52[39m] (step=0003255) Train Loss mse: 0.0000, Train Loss ce: 0.0606, Train Steps/Sec: 0.13,
|
| 3429 |
-
[[34m2025-12-30 09:46:00[39m] (step=0003256) Train Loss mse: 0.0000, Train Loss ce: 0.0624, Train Steps/Sec: 0.12,
|
| 3430 |
-
[[34m2025-12-30 09:46:08[39m] (step=0003257) Train Loss mse: 0.0000, Train Loss ce: 0.0624, Train Steps/Sec: 0.13,
|
| 3431 |
-
[[34m2025-12-30 09:46:16[39m] (step=0003258) Train Loss mse: 0.0000, Train Loss ce: 0.0616, Train Steps/Sec: 0.13,
|
| 3432 |
-
[[34m2025-12-30 09:46:24[39m] (step=0003259) Train Loss mse: 0.0000, Train Loss ce: 0.0618, Train Steps/Sec: 0.13,
|
| 3433 |
-
[[34m2025-12-30 09:46:32[39m] (step=0003260) Train Loss mse: 0.0000, Train Loss ce: 0.0614, Train Steps/Sec: 0.13,
|
| 3434 |
-
[[34m2025-12-30 09:46:40[39m] (step=0003261) Train Loss mse: 0.0000, Train Loss ce: 0.0621, Train Steps/Sec: 0.13,
|
| 3435 |
-
[[34m2025-12-30 09:46:47[39m] (step=0003262) Train Loss mse: 0.0000, Train Loss ce: 0.0609, Train Steps/Sec: 0.13,
|
| 3436 |
-
[[34m2025-12-30 09:46:55[39m] (step=0003263) Train Loss mse: 0.0000, Train Loss ce: 0.0600, Train Steps/Sec: 0.13,
|
| 3437 |
-
[[34m2025-12-30 09:47:03[39m] (step=0003264) Train Loss mse: 0.0000, Train Loss ce: 0.0619, Train Steps/Sec: 0.13,
|
| 3438 |
-
[[34m2025-12-30 09:47:11[39m] (step=0003265) Train Loss mse: 0.0000, Train Loss ce: 0.0624, Train Steps/Sec: 0.12,
|
| 3439 |
-
[[34m2025-12-30 09:47:19[39m] (step=0003266) Train Loss mse: 0.0000, Train Loss ce: 0.0616, Train Steps/Sec: 0.13,
|
| 3440 |
-
[[34m2025-12-30 09:47:27[39m] (step=0003267) Train Loss mse: 0.0000, Train Loss ce: 0.0631, Train Steps/Sec: 0.13,
|
| 3441 |
-
[[34m2025-12-30 09:47:34[39m] (step=0003268) Train Loss mse: 0.0000, Train Loss ce: 0.0611, Train Steps/Sec: 0.13,
|
| 3442 |
-
[[34m2025-12-30 09:47:42[39m] (step=0003269) Train Loss mse: 0.0000, Train Loss ce: 0.0620, Train Steps/Sec: 0.13,
|
| 3443 |
-
[[34m2025-12-30 09:47:50[39m] (step=0003270) Train Loss mse: 0.0000, Train Loss ce: 0.0617, Train Steps/Sec: 0.13,
|
| 3444 |
-
[[34m2025-12-30 09:47:58[39m] (step=0003271) Train Loss mse: 0.0000, Train Loss ce: 0.0622, Train Steps/Sec: 0.13,
|
| 3445 |
-
[[34m2025-12-30 09:48:06[39m] (step=0003272) Train Loss mse: 0.0000, Train Loss ce: 0.0613, Train Steps/Sec: 0.12,
|
| 3446 |
-
[[34m2025-12-30 09:48:14[39m] (step=0003273) Train Loss mse: 0.0000, Train Loss ce: 0.0610, Train Steps/Sec: 0.13,
|
| 3447 |
-
[[34m2025-12-30 09:48:22[39m] (step=0003274) Train Loss mse: 0.0000, Train Loss ce: 0.0608, Train Steps/Sec: 0.13,
|
| 3448 |
-
[[34m2025-12-30 09:48:29[39m] (step=0003275) Train Loss mse: 0.0000, Train Loss ce: 0.0609, Train Steps/Sec: 0.13,
|
| 3449 |
-
[[34m2025-12-30 09:48:37[39m] (step=0003276) Train Loss mse: 0.0000, Train Loss ce: 0.0613, Train Steps/Sec: 0.13,
|
| 3450 |
[[34m2025-12-30 09:48:45[39m] (step=0003277) Train Loss mse: 0.0000, Train Loss ce: 0.0611, Train Steps/Sec: 0.13,
|
| 3451 |
[[34m2025-12-30 09:48:53[39m] (step=0003278) Train Loss mse: 0.0000, Train Loss ce: 0.0611, Train Steps/Sec: 0.12,
|
| 3452 |
[[34m2025-12-30 09:49:01[39m] (step=0003279) Train Loss mse: 0.0000, Train Loss ce: 0.0611, Train Steps/Sec: 0.13,
|
|
|
|
| 1 |
+
FullyShardedDataParallel(
|
| 2 |
+
(_fsdp_wrapped_module): Bagel(
|
| 3 |
+
(language_model): Qwen2ForCausalLM(
|
| 4 |
+
(model): Qwen2Model(
|
| 5 |
+
(embed_tokens): Embedding(152064, 3584)
|
| 6 |
+
(layers): ModuleList(
|
| 7 |
+
(0-27): 28 x FullyShardedDataParallel(
|
| 8 |
+
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 9 |
+
(_checkpoint_wrapped_module): Qwen2MoTDecoderLayer(
|
| 10 |
+
(self_attn): PackedAttentionMoT(
|
| 11 |
+
(q_proj): Linear(in_features=3584, out_features=3584, bias=True)
|
| 12 |
+
(k_proj): Linear(in_features=3584, out_features=512, bias=True)
|
| 13 |
+
(v_proj): Linear(in_features=3584, out_features=512, bias=True)
|
| 14 |
+
(o_proj): Linear(in_features=3584, out_features=3584, bias=False)
|
| 15 |
+
(q_norm): Qwen2RMSNorm((128,), eps=1e-06)
|
| 16 |
+
(k_norm): Qwen2RMSNorm((128,), eps=1e-06)
|
| 17 |
+
(q_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06)
|
| 18 |
+
(k_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06)
|
| 19 |
+
(q_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=True)
|
| 20 |
+
(k_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True)
|
| 21 |
+
(v_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True)
|
| 22 |
+
(o_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=False)
|
| 23 |
+
)
|
| 24 |
+
(mlp): Qwen2MLP(
|
| 25 |
+
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 26 |
+
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 27 |
+
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
|
| 28 |
+
(act_fn): SiLU()
|
| 29 |
+
)
|
| 30 |
+
(mlp_moe_gen): Qwen2MLP(
|
| 31 |
+
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 32 |
+
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 33 |
+
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
|
| 34 |
+
(act_fn): SiLU()
|
| 35 |
+
)
|
| 36 |
+
(input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 37 |
+
(input_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 38 |
+
(post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 39 |
+
(post_attention_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 40 |
+
)
|
| 41 |
+
)
|
| 42 |
+
)
|
| 43 |
+
)
|
| 44 |
+
(norm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 45 |
+
(norm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 46 |
+
(rotary_emb): Qwen2RotaryEmbedding()
|
| 47 |
+
)
|
| 48 |
+
(lm_head): Linear(in_features=3584, out_features=152064, bias=False)
|
| 49 |
+
)
|
| 50 |
+
(vit_model): SiglipVisionModel(
|
| 51 |
+
(vision_model): FullyShardedDataParallel(
|
| 52 |
+
(_fsdp_wrapped_module): SiglipVisionTransformer(
|
| 53 |
+
(embeddings): SiglipVisionEmbeddings(
|
| 54 |
+
(position_embedding): Embedding(4900, 1152)
|
| 55 |
+
(patch_embedding): Linear(in_features=588, out_features=1152, bias=True)
|
| 56 |
+
)
|
| 57 |
+
(encoder): SiglipEncoder(
|
| 58 |
+
(layers): ModuleList(
|
| 59 |
+
(0-25): 26 x FullyShardedDataParallel(
|
| 60 |
+
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 61 |
+
(_checkpoint_wrapped_module): SiglipEncoderLayer(
|
| 62 |
+
(self_attn): SiglipFlashAttention2(
|
| 63 |
+
(k_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 64 |
+
(v_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 65 |
+
(q_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 66 |
+
(out_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 67 |
+
)
|
| 68 |
+
(layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 69 |
+
(mlp): SiglipMLP(
|
| 70 |
+
(activation_fn): PytorchGELUTanh()
|
| 71 |
+
(fc1): Linear(in_features=1152, out_features=4304, bias=True)
|
| 72 |
+
(fc2): Linear(in_features=4304, out_features=1152, bias=True)
|
| 73 |
+
)
|
| 74 |
+
(layer_norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 75 |
+
)
|
| 76 |
+
)
|
| 77 |
+
)
|
| 78 |
+
)
|
| 79 |
+
)
|
| 80 |
+
(post_layernorm): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 81 |
+
)
|
| 82 |
+
)
|
| 83 |
+
)
|
| 84 |
+
(connector): FullyShardedDataParallel(
|
| 85 |
+
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 86 |
+
(_checkpoint_wrapped_module): MLPconnector(
|
| 87 |
+
(activation_fn): PytorchGELUTanh()
|
| 88 |
+
(fc1): Linear(in_features=1152, out_features=3584, bias=True)
|
| 89 |
+
(fc2): Linear(in_features=3584, out_features=3584, bias=True)
|
| 90 |
+
)
|
| 91 |
+
)
|
| 92 |
+
)
|
| 93 |
+
(vit_pos_embed): FullyShardedDataParallel(
|
| 94 |
+
(_fsdp_wrapped_module): PositionEmbedding()
|
| 95 |
+
)
|
| 96 |
+
)
|
| 97 |
+
)
|
| 98 |
+
_flat_param True
|
| 99 |
+
language_model.model.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 100 |
+
language_model.model.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 101 |
+
language_model.model.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 102 |
+
language_model.model.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 103 |
+
language_model.model.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 104 |
+
language_model.model.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 105 |
+
language_model.model.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 106 |
+
language_model.model.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 107 |
+
language_model.model.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 108 |
+
language_model.model.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 109 |
+
language_model.model.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 110 |
+
language_model.model.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 111 |
+
language_model.model.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 112 |
+
language_model.model.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 113 |
+
language_model.model.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 114 |
+
language_model.model.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 115 |
+
language_model.model.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 116 |
+
language_model.model.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 117 |
+
language_model.model.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 118 |
+
language_model.model.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 119 |
+
language_model.model.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 120 |
+
language_model.model.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 121 |
+
language_model.model.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 122 |
+
language_model.model.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 123 |
+
language_model.model.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 124 |
+
language_model.model.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 125 |
+
language_model.model.layers.26._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 126 |
+
language_model.model.layers.27._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 127 |
+
vit_model.vision_model._fsdp_wrapped_module._flat_param True
|
| 128 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 129 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 130 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 131 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 132 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 133 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 134 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 135 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 136 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 137 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 138 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 139 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 140 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 141 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 142 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 143 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 144 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 145 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 146 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 147 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 148 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 149 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 150 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 151 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 152 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 153 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 154 |
+
connector._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 155 |
+
vit_pos_embed._fsdp_wrapped_module._flat_param False
|
| 156 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_junyi/vlm_gym_jigsaw_train
|
| 157 |
+
Warning: failed loading hashes from /home/jiaxin/bagel_train/hashes_test_set_v10.json: [Errno 2] No such file or directory: '/home/jiaxin/bagel_train/hashes_test_set_v10.json'
|
| 158 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_junyi/vlm_gym_jigsaw_val
|
| 159 |
+
Warning: failed loading hashes from /home/jiaxin/bagel_train/hashes_test_set_v10.json: [Errno 2] No such file or directory: '/home/jiaxin/bagel_train/hashes_test_set_v10.json'
|
| 160 |
wandb: Detected [huggingface_hub.inference] in use.
|
| 161 |
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
|
| 162 |
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
|
|
| 1138 |
[[34m2025-12-30 04:42:35[39m] (step=0000968) Train Loss mse: 0.0000, Train Loss ce: 0.0645, Train Steps/Sec: 0.13,
|
| 1139 |
[[34m2025-12-30 04:42:43[39m] (step=0000969) Train Loss mse: 0.0000, Train Loss ce: 0.0641, Train Steps/Sec: 0.13,
|
| 1140 |
[[34m2025-12-30 04:42:51[39m] (step=0000970) Train Loss mse: 0.0000, Train Loss ce: 0.0653, Train Steps/Sec: 0.13,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1141 |
[[34m2025-12-30 04:42:58[39m] (step=0000971) Train Loss mse: 0.0000, Train Loss ce: 0.0651, Train Steps/Sec: 0.13,
|
| 1142 |
[[34m2025-12-30 04:43:06[39m] (step=0000972) Train Loss mse: 0.0000, Train Loss ce: 0.0663, Train Steps/Sec: 0.12,
|
| 1143 |
[[34m2025-12-30 04:43:14[39m] (step=0000973) Train Loss mse: 0.0000, Train Loss ce: 0.0629, Train Steps/Sec: 0.13,
|
|
|
|
| 3368 |
[[34m2025-12-30 09:37:59[39m] (step=0003195) Train Loss mse: 0.0000, Train Loss ce: 0.0602, Train Steps/Sec: 0.12,
|
| 3369 |
[[34m2025-12-30 09:38:06[39m] (step=0003196) Train Loss mse: 0.0000, Train Loss ce: 0.0614, Train Steps/Sec: 0.13,
|
| 3370 |
[[34m2025-12-30 09:38:14[39m] (step=0003197) Train Loss mse: 0.0000, Train Loss ce: 0.0616, Train Steps/Sec: 0.13,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3371 |
[[34m2025-12-30 09:48:45[39m] (step=0003277) Train Loss mse: 0.0000, Train Loss ce: 0.0611, Train Steps/Sec: 0.13,
|
| 3372 |
[[34m2025-12-30 09:48:53[39m] (step=0003278) Train Loss mse: 0.0000, Train Loss ce: 0.0611, Train Steps/Sec: 0.12,
|
| 3373 |
[[34m2025-12-30 09:49:01[39m] (step=0003279) Train Loss mse: 0.0000, Train Loss ce: 0.0611, Train Steps/Sec: 0.13,
|
checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/wandb/offline-run-20251230_024203-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed-run0/files/output.log
CHANGED
|
@@ -1,160 +1,3 @@
|
|
| 1 |
-
FullyShardedDataParallel(
|
| 2 |
-
(_fsdp_wrapped_module): Bagel(
|
| 3 |
-
(language_model): Qwen2ForCausalLM(
|
| 4 |
-
(model): Qwen2Model(
|
| 5 |
-
(embed_tokens): Embedding(152064, 3584)
|
| 6 |
-
(layers): ModuleList(
|
| 7 |
-
(0-27): 28 x FullyShardedDataParallel(
|
| 8 |
-
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 9 |
-
(_checkpoint_wrapped_module): Qwen2MoTDecoderLayer(
|
| 10 |
-
(self_attn): PackedAttentionMoT(
|
| 11 |
-
(q_proj): Linear(in_features=3584, out_features=3584, bias=True)
|
| 12 |
-
(k_proj): Linear(in_features=3584, out_features=512, bias=True)
|
| 13 |
-
(v_proj): Linear(in_features=3584, out_features=512, bias=True)
|
| 14 |
-
(o_proj): Linear(in_features=3584, out_features=3584, bias=False)
|
| 15 |
-
(q_norm): Qwen2RMSNorm((128,), eps=1e-06)
|
| 16 |
-
(k_norm): Qwen2RMSNorm((128,), eps=1e-06)
|
| 17 |
-
(q_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06)
|
| 18 |
-
(k_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06)
|
| 19 |
-
(q_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=True)
|
| 20 |
-
(k_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True)
|
| 21 |
-
(v_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True)
|
| 22 |
-
(o_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=False)
|
| 23 |
-
)
|
| 24 |
-
(mlp): Qwen2MLP(
|
| 25 |
-
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 26 |
-
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 27 |
-
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
|
| 28 |
-
(act_fn): SiLU()
|
| 29 |
-
)
|
| 30 |
-
(mlp_moe_gen): Qwen2MLP(
|
| 31 |
-
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 32 |
-
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 33 |
-
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
|
| 34 |
-
(act_fn): SiLU()
|
| 35 |
-
)
|
| 36 |
-
(input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 37 |
-
(input_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 38 |
-
(post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 39 |
-
(post_attention_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 40 |
-
)
|
| 41 |
-
)
|
| 42 |
-
)
|
| 43 |
-
)
|
| 44 |
-
(norm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 45 |
-
(norm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 46 |
-
(rotary_emb): Qwen2RotaryEmbedding()
|
| 47 |
-
)
|
| 48 |
-
(lm_head): Linear(in_features=3584, out_features=152064, bias=False)
|
| 49 |
-
)
|
| 50 |
-
(vit_model): SiglipVisionModel(
|
| 51 |
-
(vision_model): FullyShardedDataParallel(
|
| 52 |
-
(_fsdp_wrapped_module): SiglipVisionTransformer(
|
| 53 |
-
(embeddings): SiglipVisionEmbeddings(
|
| 54 |
-
(position_embedding): Embedding(4900, 1152)
|
| 55 |
-
(patch_embedding): Linear(in_features=588, out_features=1152, bias=True)
|
| 56 |
-
)
|
| 57 |
-
(encoder): SiglipEncoder(
|
| 58 |
-
(layers): ModuleList(
|
| 59 |
-
(0-25): 26 x FullyShardedDataParallel(
|
| 60 |
-
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 61 |
-
(_checkpoint_wrapped_module): SiglipEncoderLayer(
|
| 62 |
-
(self_attn): SiglipFlashAttention2(
|
| 63 |
-
(k_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 64 |
-
(v_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 65 |
-
(q_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 66 |
-
(out_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 67 |
-
)
|
| 68 |
-
(layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 69 |
-
(mlp): SiglipMLP(
|
| 70 |
-
(activation_fn): PytorchGELUTanh()
|
| 71 |
-
(fc1): Linear(in_features=1152, out_features=4304, bias=True)
|
| 72 |
-
(fc2): Linear(in_features=4304, out_features=1152, bias=True)
|
| 73 |
-
)
|
| 74 |
-
(layer_norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 75 |
-
)
|
| 76 |
-
)
|
| 77 |
-
)
|
| 78 |
-
)
|
| 79 |
-
)
|
| 80 |
-
(post_layernorm): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 81 |
-
)
|
| 82 |
-
)
|
| 83 |
-
)
|
| 84 |
-
(connector): FullyShardedDataParallel(
|
| 85 |
-
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 86 |
-
(_checkpoint_wrapped_module): MLPconnector(
|
| 87 |
-
(activation_fn): PytorchGELUTanh()
|
| 88 |
-
(fc1): Linear(in_features=1152, out_features=3584, bias=True)
|
| 89 |
-
(fc2): Linear(in_features=3584, out_features=3584, bias=True)
|
| 90 |
-
)
|
| 91 |
-
)
|
| 92 |
-
)
|
| 93 |
-
(vit_pos_embed): FullyShardedDataParallel(
|
| 94 |
-
(_fsdp_wrapped_module): PositionEmbedding()
|
| 95 |
-
)
|
| 96 |
-
)
|
| 97 |
-
)
|
| 98 |
-
_flat_param True
|
| 99 |
-
language_model.model.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 100 |
-
language_model.model.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 101 |
-
language_model.model.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 102 |
-
language_model.model.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 103 |
-
language_model.model.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 104 |
-
language_model.model.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 105 |
-
language_model.model.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 106 |
-
language_model.model.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 107 |
-
language_model.model.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 108 |
-
language_model.model.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 109 |
-
language_model.model.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 110 |
-
language_model.model.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 111 |
-
language_model.model.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 112 |
-
language_model.model.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 113 |
-
language_model.model.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 114 |
-
language_model.model.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 115 |
-
language_model.model.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 116 |
-
language_model.model.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 117 |
-
language_model.model.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 118 |
-
language_model.model.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 119 |
-
language_model.model.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 120 |
-
language_model.model.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 121 |
-
language_model.model.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 122 |
-
language_model.model.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 123 |
-
language_model.model.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 124 |
-
language_model.model.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 125 |
-
language_model.model.layers.26._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 126 |
-
language_model.model.layers.27._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 127 |
-
vit_model.vision_model._fsdp_wrapped_module._flat_param True
|
| 128 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 129 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 130 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 131 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 132 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 133 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 134 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 135 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 136 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 137 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 138 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 139 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 140 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 141 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 142 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 143 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 144 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 145 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 146 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 147 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 148 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 149 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 150 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 151 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 152 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 153 |
-
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 154 |
-
connector._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 155 |
-
vit_pos_embed._fsdp_wrapped_module._flat_param False
|
| 156 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_junyi/vlm_gym_jigsaw_train
|
| 157 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_junyi/vlm_gym_jigsaw_val
|
| 158 |
wandb: Detected [huggingface_hub.inference] in use.
|
| 159 |
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
|
| 160 |
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
@@ -442,4 +285,161 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 442 |
[[34m2025-12-30 03:24:38[39m] (step=0000274) Train Loss mse: 0.0000, Train Loss ce: 0.0703, Train Steps/Sec: 0.13,
|
| 443 |
[[34m2025-12-30 03:24:46[39m] (step=0000275) Train Loss mse: 0.0000, Train Loss ce: 0.0741, Train Steps/Sec: 0.13,
|
| 444 |
[[34m2025-12-30 03:24:54[39m] (step=0000276) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.12,
|
| 445 |
-
[[34m2025-12-30 03:25:02[39m] (step=0000277) Train Loss mse: 0.0000, Train Loss ce: 0.0748, Train Steps/Sec: 0.13,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
wandb: Detected [huggingface_hub.inference] in use.
|
| 2 |
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
|
| 3 |
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
|
|
| 285 |
[[34m2025-12-30 03:24:38[39m] (step=0000274) Train Loss mse: 0.0000, Train Loss ce: 0.0703, Train Steps/Sec: 0.13,
|
| 286 |
[[34m2025-12-30 03:24:46[39m] (step=0000275) Train Loss mse: 0.0000, Train Loss ce: 0.0741, Train Steps/Sec: 0.13,
|
| 287 |
[[34m2025-12-30 03:24:54[39m] (step=0000276) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.12,
|
| 288 |
+
[[34m2025-12-30 03:25:02[39m] (step=0000277) Train Loss mse: 0.0000, Train Loss ce: 0.0748, Train Steps/Sec: 0.13,
|
| 289 |
+
FullyShardedDataParallel(
|
| 290 |
+
(_fsdp_wrapped_module): Bagel(
|
| 291 |
+
(language_model): Qwen2ForCausalLM(
|
| 292 |
+
(model): Qwen2Model(
|
| 293 |
+
(embed_tokens): Embedding(152064, 3584)
|
| 294 |
+
(layers): ModuleList(
|
| 295 |
+
(0-27): 28 x FullyShardedDataParallel(
|
| 296 |
+
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 297 |
+
(_checkpoint_wrapped_module): Qwen2MoTDecoderLayer(
|
| 298 |
+
(self_attn): PackedAttentionMoT(
|
| 299 |
+
(q_proj): Linear(in_features=3584, out_features=3584, bias=True)
|
| 300 |
+
(k_proj): Linear(in_features=3584, out_features=512, bias=True)
|
| 301 |
+
(v_proj): Linear(in_features=3584, out_features=512, bias=True)
|
| 302 |
+
(o_proj): Linear(in_features=3584, out_features=3584, bias=False)
|
| 303 |
+
(q_norm): Qwen2RMSNorm((128,), eps=1e-06)
|
| 304 |
+
(k_norm): Qwen2RMSNorm((128,), eps=1e-06)
|
| 305 |
+
(q_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06)
|
| 306 |
+
(k_norm_moe_gen): Qwen2RMSNorm((128,), eps=1e-06)
|
| 307 |
+
(q_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=True)
|
| 308 |
+
(k_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True)
|
| 309 |
+
(v_proj_moe_gen): Linear(in_features=3584, out_features=512, bias=True)
|
| 310 |
+
(o_proj_moe_gen): Linear(in_features=3584, out_features=3584, bias=False)
|
| 311 |
+
)
|
| 312 |
+
(mlp): Qwen2MLP(
|
| 313 |
+
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 314 |
+
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 315 |
+
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
|
| 316 |
+
(act_fn): SiLU()
|
| 317 |
+
)
|
| 318 |
+
(mlp_moe_gen): Qwen2MLP(
|
| 319 |
+
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 320 |
+
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
|
| 321 |
+
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
|
| 322 |
+
(act_fn): SiLU()
|
| 323 |
+
)
|
| 324 |
+
(input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 325 |
+
(input_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 326 |
+
(post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 327 |
+
(post_attention_layernorm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 328 |
+
)
|
| 329 |
+
)
|
| 330 |
+
)
|
| 331 |
+
)
|
| 332 |
+
(norm): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 333 |
+
(norm_moe_gen): Qwen2RMSNorm((3584,), eps=1e-06)
|
| 334 |
+
(rotary_emb): Qwen2RotaryEmbedding()
|
| 335 |
+
)
|
| 336 |
+
(lm_head): Linear(in_features=3584, out_features=152064, bias=False)
|
| 337 |
+
)
|
| 338 |
+
(vit_model): SiglipVisionModel(
|
| 339 |
+
(vision_model): FullyShardedDataParallel(
|
| 340 |
+
(_fsdp_wrapped_module): SiglipVisionTransformer(
|
| 341 |
+
(embeddings): SiglipVisionEmbeddings(
|
| 342 |
+
(position_embedding): Embedding(4900, 1152)
|
| 343 |
+
(patch_embedding): Linear(in_features=588, out_features=1152, bias=True)
|
| 344 |
+
)
|
| 345 |
+
(encoder): SiglipEncoder(
|
| 346 |
+
(layers): ModuleList(
|
| 347 |
+
(0-25): 26 x FullyShardedDataParallel(
|
| 348 |
+
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 349 |
+
(_checkpoint_wrapped_module): SiglipEncoderLayer(
|
| 350 |
+
(self_attn): SiglipFlashAttention2(
|
| 351 |
+
(k_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 352 |
+
(v_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 353 |
+
(q_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 354 |
+
(out_proj): Linear(in_features=1152, out_features=1152, bias=True)
|
| 355 |
+
)
|
| 356 |
+
(layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 357 |
+
(mlp): SiglipMLP(
|
| 358 |
+
(activation_fn): PytorchGELUTanh()
|
| 359 |
+
(fc1): Linear(in_features=1152, out_features=4304, bias=True)
|
| 360 |
+
(fc2): Linear(in_features=4304, out_features=1152, bias=True)
|
| 361 |
+
)
|
| 362 |
+
(layer_norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 363 |
+
)
|
| 364 |
+
)
|
| 365 |
+
)
|
| 366 |
+
)
|
| 367 |
+
)
|
| 368 |
+
(post_layernorm): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
|
| 369 |
+
)
|
| 370 |
+
)
|
| 371 |
+
)
|
| 372 |
+
(connector): FullyShardedDataParallel(
|
| 373 |
+
(_fsdp_wrapped_module): CheckpointWrapper(
|
| 374 |
+
(_checkpoint_wrapped_module): MLPconnector(
|
| 375 |
+
(activation_fn): PytorchGELUTanh()
|
| 376 |
+
(fc1): Linear(in_features=1152, out_features=3584, bias=True)
|
| 377 |
+
(fc2): Linear(in_features=3584, out_features=3584, bias=True)
|
| 378 |
+
)
|
| 379 |
+
)
|
| 380 |
+
)
|
| 381 |
+
(vit_pos_embed): FullyShardedDataParallel(
|
| 382 |
+
(_fsdp_wrapped_module): PositionEmbedding()
|
| 383 |
+
)
|
| 384 |
+
)
|
| 385 |
+
)
|
| 386 |
+
_flat_param True
|
| 387 |
+
language_model.model.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 388 |
+
language_model.model.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 389 |
+
language_model.model.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 390 |
+
language_model.model.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 391 |
+
language_model.model.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 392 |
+
language_model.model.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 393 |
+
language_model.model.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 394 |
+
language_model.model.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 395 |
+
language_model.model.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 396 |
+
language_model.model.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 397 |
+
language_model.model.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 398 |
+
language_model.model.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 399 |
+
language_model.model.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 400 |
+
language_model.model.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 401 |
+
language_model.model.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 402 |
+
language_model.model.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 403 |
+
language_model.model.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 404 |
+
language_model.model.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 405 |
+
language_model.model.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 406 |
+
language_model.model.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 407 |
+
language_model.model.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 408 |
+
language_model.model.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 409 |
+
language_model.model.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 410 |
+
language_model.model.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 411 |
+
language_model.model.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 412 |
+
language_model.model.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 413 |
+
language_model.model.layers.26._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 414 |
+
language_model.model.layers.27._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 415 |
+
vit_model.vision_model._fsdp_wrapped_module._flat_param True
|
| 416 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.0._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 417 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.1._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 418 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.2._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 419 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.3._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 420 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.4._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 421 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.5._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 422 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.6._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 423 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.7._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 424 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.8._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 425 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.9._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 426 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.10._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 427 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.11._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 428 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.12._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 429 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.13._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 430 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.14._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 431 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.15._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 432 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.16._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 433 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.17._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 434 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.18._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 435 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.19._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 436 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.20._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 437 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.21._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 438 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.22._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 439 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.23._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 440 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.24._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 441 |
+
vit_model.vision_model._fsdp_wrapped_module.encoder.layers.25._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 442 |
+
connector._fsdp_wrapped_module._checkpoint_wrapped_module._flat_param True
|
| 443 |
+
vit_pos_embed._fsdp_wrapped_module._flat_param False
|
| 444 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_junyi/vlm_gym_jigsaw_train
|
| 445 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_junyi/vlm_gym_jigsaw_val
|
checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/wandb/offline-run-20260105_043345-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed-run0/files/output.log
CHANGED
|
@@ -25484,835 +25484,4 @@ vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
|
| 25484 |
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25485 |
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25486 |
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25487 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25488 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25489 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25490 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25491 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25492 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25493 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25494 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25495 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25496 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25497 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25498 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25499 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25500 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25501 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25502 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25503 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25504 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25505 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25506 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25507 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25508 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25509 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25510 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25511 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25512 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25513 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25514 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25515 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25516 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25517 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25518 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25519 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25520 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25521 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25522 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25523 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25524 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25525 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25526 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25527 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25528 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25529 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25530 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25531 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25532 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25533 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25534 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25535 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25536 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25537 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25538 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25539 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25540 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25541 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25542 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25543 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25544 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25545 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25546 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25547 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25548 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25549 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25550 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25551 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25552 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25553 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25554 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25555 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25556 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25557 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25558 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25559 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25560 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25561 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25562 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25563 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25564 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25565 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25566 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25567 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25568 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25569 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25570 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25571 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25572 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25573 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25574 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25575 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25576 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25577 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25578 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25579 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25580 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25581 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25582 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25583 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25584 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25585 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25586 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25587 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25588 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25589 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25590 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25591 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25592 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25593 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25594 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25595 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25596 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25597 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25598 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25599 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25600 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25601 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25602 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25603 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25604 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25605 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25606 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25607 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25608 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25609 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25610 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25611 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25612 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25613 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25614 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25615 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25616 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25617 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25618 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25619 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25620 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25621 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25622 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25623 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25624 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25625 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25626 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25627 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25628 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25629 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25630 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25631 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25632 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25633 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25634 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25635 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25636 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25637 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25638 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25639 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25640 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25641 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25642 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25643 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25644 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25645 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25646 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25647 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25648 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25649 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25650 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25651 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25652 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25653 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25654 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25655 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25656 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25657 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25658 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25659 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25660 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25661 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25662 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25663 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25664 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25665 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25666 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25667 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25668 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25669 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25670 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25671 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25672 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25673 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25674 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25675 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25676 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25677 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25678 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25679 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25680 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25681 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25682 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25683 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25684 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25685 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25686 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25687 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25688 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25689 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25690 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25691 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25692 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25693 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25694 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25695 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25696 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25697 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25698 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25699 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25700 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25701 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25702 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25703 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25704 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25705 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25706 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25707 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25708 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25709 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25710 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25711 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25712 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25713 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25714 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25715 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25716 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25717 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25718 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25719 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25720 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25721 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25722 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25723 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25724 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25725 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25726 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25727 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25728 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25729 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25730 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25731 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25732 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25733 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25734 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25735 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25736 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25737 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25738 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25739 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25740 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25741 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25742 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25743 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25744 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25745 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25746 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25747 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25748 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25749 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25750 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25751 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25752 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25753 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25754 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25755 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25756 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25757 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25758 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25759 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25760 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25761 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25762 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25763 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25764 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25765 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25766 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25767 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25768 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25769 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25770 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25771 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25772 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25773 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25774 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25775 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25776 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25777 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25778 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25779 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25780 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25781 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25782 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25783 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25784 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25785 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25786 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25787 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25788 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25789 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25790 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25791 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25792 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25793 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25794 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25795 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25796 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25797 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25798 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25799 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25800 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25801 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25802 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25803 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25804 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25805 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25806 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25807 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25808 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25809 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25810 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25811 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25812 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25813 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25814 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25815 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25816 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25817 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25818 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25819 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25820 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25821 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25822 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25823 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25824 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25825 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25826 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25827 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25828 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25829 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25830 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25831 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25832 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25833 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25834 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25835 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25836 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25837 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25838 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25839 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25840 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25841 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25842 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25843 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25844 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25845 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25846 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25847 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25848 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25849 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25850 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25851 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25852 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25853 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25854 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25855 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25856 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25857 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25858 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25859 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25860 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25861 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25862 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25863 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25864 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25865 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25866 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25867 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25868 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25869 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25870 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25871 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25872 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25873 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25874 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25875 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25876 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25877 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25878 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25879 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25880 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25881 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25882 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25883 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25884 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25885 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25886 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25887 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25888 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25889 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25890 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25891 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25892 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25893 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25894 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25895 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25896 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25897 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25898 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25899 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25900 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25901 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25902 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25903 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25904 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25905 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25906 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25907 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25908 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25909 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25910 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25911 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25912 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25913 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25914 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25915 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25916 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25917 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25918 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25919 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25920 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25921 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25922 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25923 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25924 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25925 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25926 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25927 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25928 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25929 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25930 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25931 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25932 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25933 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25934 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25935 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25936 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25937 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25938 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25939 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25940 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25941 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25942 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25943 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25944 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25945 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25946 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25947 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25948 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25949 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25950 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25951 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25952 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25953 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25954 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25955 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25956 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25957 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25958 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25959 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25960 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25961 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25962 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25963 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25964 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25965 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25966 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25967 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25968 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25969 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25970 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25971 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25972 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25973 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25974 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25975 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25976 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25977 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25978 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25979 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25980 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25981 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25982 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25983 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25984 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25985 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25986 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25987 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25988 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25989 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25990 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25991 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25992 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25993 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25994 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25995 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25996 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25997 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25998 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25999 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26000 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26001 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26002 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26003 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26004 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26005 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26006 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26007 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26008 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26009 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26010 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26011 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26012 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26013 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26014 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26015 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26016 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26017 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26018 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26019 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26020 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26021 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26022 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26023 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26024 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26025 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26026 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26027 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26028 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26029 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26030 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26031 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26032 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26033 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26034 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26035 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26036 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26037 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26038 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26039 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26040 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26041 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26042 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26043 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26044 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26045 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26046 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26047 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26048 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26049 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26050 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26051 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26052 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26053 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26054 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26055 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26056 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26057 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26058 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26059 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26060 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26061 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26062 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26063 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26064 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26065 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26066 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26067 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26068 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26069 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26070 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26071 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26072 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26073 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26074 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26075 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26076 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26077 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26078 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26079 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26080 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26081 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26082 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26083 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26084 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26085 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26086 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26087 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26088 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26089 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26090 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26091 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26092 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26093 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26094 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26095 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26096 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26097 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26098 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26099 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26100 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26101 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26102 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26103 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26104 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26105 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26106 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26107 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26108 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26109 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26110 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26111 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26112 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26113 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26114 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26115 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26116 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26117 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26118 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26119 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26120 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26121 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26122 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26123 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26124 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26125 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26126 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26127 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26128 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26129 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26130 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26131 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26132 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26133 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26134 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26135 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26136 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26137 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26138 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26139 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26140 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26141 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26142 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26143 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26144 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26145 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26146 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26147 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26148 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26149 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26150 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26151 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26152 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26153 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26154 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26155 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26156 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26157 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26158 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26159 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26160 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26161 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26162 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26163 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26164 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26165 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26166 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26167 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26168 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26169 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26170 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26171 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26172 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26173 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26174 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26175 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26176 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26177 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26178 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26179 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26180 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26181 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26182 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26183 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26184 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26185 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26186 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26187 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26188 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26189 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26190 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26191 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26192 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26193 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26194 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26195 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26196 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26197 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26198 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26199 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26200 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26201 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26202 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26203 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26204 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26205 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26206 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26207 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26208 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26209 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26210 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26211 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26212 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26213 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26214 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26215 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26216 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26217 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26218 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26219 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26220 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26221 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26222 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26223 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26224 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26225 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26226 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26227 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26228 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26229 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26230 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26231 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26232 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26233 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26234 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26235 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26236 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26237 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26238 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26239 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26240 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26241 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26242 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26243 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26244 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26245 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26246 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26247 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26248 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26249 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26250 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26251 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26252 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26253 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26254 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26255 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26256 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26257 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26258 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26259 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26260 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26261 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26262 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26263 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26264 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26265 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26266 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26267 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26268 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26269 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26270 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26271 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26272 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26273 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26274 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26275 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26276 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26277 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26278 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26279 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26280 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26281 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26282 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26283 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26284 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26285 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26286 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26287 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26288 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26289 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26290 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26291 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26292 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26293 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26294 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26295 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26296 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26297 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26298 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26299 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26300 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26301 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26302 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26303 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26304 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26305 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26306 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26307 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26308 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26309 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26310 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26311 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26312 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26313 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26314 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26315 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26316 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26317 |
-
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 26318 |
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
|
|
|
| 25484 |
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25485 |
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
| 25486 |
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25487 |
vlm_gym_jigsaw_celoss_no_mse_junyi repeat in rank-0 worker-0
|
checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/wandb/offline-run-20260107_185004-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed-run0/files/output.log
CHANGED
|
@@ -168,13 +168,6 @@ Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
|
| 168 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 169 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 170 |
ce_avg: 0.06184878200292587, mse_avg: 0.0
|
| 171 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step1000
|
| 172 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 173 |
-
[eval debug] first 3 batch fingerprints:
|
| 174 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 175 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 176 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 177 |
-
ce_avg: 0.08925717324018478, mse_avg: 0.0
|
| 178 |
wandb: Detected [huggingface_hub.inference] in use.
|
| 179 |
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
|
| 180 |
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
@@ -1089,20 +1082,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 1089 |
[[34m2026-01-07 21:00:31[39m] (step=0000901) Train Loss mse: 0.0000, Train Loss ce: 0.0586, Train Steps/Sec: 0.12,
|
| 1090 |
[[34m2026-01-07 21:00:39[39m] (step=0000902) Train Loss mse: 0.0000, Train Loss ce: 0.0604, Train Steps/Sec: 0.12,
|
| 1091 |
[[34m2026-01-07 21:00:47[39m] (step=0000903) Train Loss mse: 0.0000, Train Loss ce: 0.0591, Train Steps/Sec: 0.12,
|
| 1092 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step1500
|
| 1093 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 1094 |
-
[eval debug] first 3 batch fingerprints:
|
| 1095 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1096 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1097 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1098 |
-
ce_avg: 0.1071508377790451, mse_avg: 0.0
|
| 1099 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step2000
|
| 1100 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 1101 |
-
[eval debug] first 3 batch fingerprints:
|
| 1102 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1103 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1104 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1105 |
-
ce_avg: 0.1155419573187828, mse_avg: 0.0
|
| 1106 |
[[34m2026-01-07 21:00:55[39m] (step=0000904) Train Loss mse: 0.0000, Train Loss ce: 0.0592, Train Steps/Sec: 0.12,
|
| 1107 |
[[34m2026-01-07 21:01:04[39m] (step=0000905) Train Loss mse: 0.0000, Train Loss ce: 0.0595, Train Steps/Sec: 0.12,
|
| 1108 |
[[34m2026-01-07 21:01:12[39m] (step=0000906) Train Loss mse: 0.0000, Train Loss ce: 0.0606, Train Steps/Sec: 0.12,
|
|
@@ -1155,6 +1134,20 @@ ce_avg: 0.1155419573187828, mse_avg: 0.0
|
|
| 1155 |
[[34m2026-01-07 21:07:39[39m] (step=0000953) Train Loss mse: 0.0000, Train Loss ce: 0.0580, Train Steps/Sec: 0.12,
|
| 1156 |
[[34m2026-01-07 21:07:47[39m] (step=0000954) Train Loss mse: 0.0000, Train Loss ce: 0.0604, Train Steps/Sec: 0.12,
|
| 1157 |
[[34m2026-01-07 21:07:55[39m] (step=0000955) Train Loss mse: 0.0000, Train Loss ce: 0.0597, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1158 |
[[34m2026-01-07 21:08:03[39m] (step=0000956) Train Loss mse: 0.0000, Train Loss ce: 0.0595, Train Steps/Sec: 0.12,
|
| 1159 |
[[34m2026-01-07 21:08:12[39m] (step=0000957) Train Loss mse: 0.0000, Train Loss ce: 0.0602, Train Steps/Sec: 0.12,
|
| 1160 |
[[34m2026-01-07 21:08:20[39m] (step=0000958) Train Loss mse: 0.0000, Train Loss ce: 0.0602, Train Steps/Sec: 0.12,
|
|
@@ -2406,6 +2399,27 @@ ce_avg: 0.1155419573187828, mse_avg: 0.0
|
|
| 2406 |
[[34m2026-01-07 23:59:46[39m] (step=0002204) Train Loss mse: 0.0000, Train Loss ce: 0.0569, Train Steps/Sec: 0.12,
|
| 2407 |
[[34m2026-01-07 23:59:54[39m] (step=0002205) Train Loss mse: 0.0000, Train Loss ce: 0.0562, Train Steps/Sec: 0.12,
|
| 2408 |
[[34m2026-01-08 00:00:02[39m] (step=0002206) Train Loss mse: 0.0000, Train Loss ce: 0.0587, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2409 |
[[34m2026-01-08 00:00:10[39m] (step=0002207) Train Loss mse: 0.0000, Train Loss ce: 0.0568, Train Steps/Sec: 0.12,
|
| 2410 |
[[34m2026-01-08 00:00:19[39m] (step=0002208) Train Loss mse: 0.0000, Train Loss ce: 0.0573, Train Steps/Sec: 0.12,
|
| 2411 |
[[34m2026-01-08 00:00:27[39m] (step=0002209) Train Loss mse: 0.0000, Train Loss ce: 0.0578, Train Steps/Sec: 0.12,
|
|
@@ -2470,20 +2484,6 @@ ce_avg: 0.1155419573187828, mse_avg: 0.0
|
|
| 2470 |
[[34m2026-01-08 00:08:33[39m] (step=0002268) Train Loss mse: 0.0000, Train Loss ce: 0.0577, Train Steps/Sec: 0.12,
|
| 2471 |
[[34m2026-01-08 00:08:41[39m] (step=0002269) Train Loss mse: 0.0000, Train Loss ce: 0.0576, Train Steps/Sec: 0.12,
|
| 2472 |
[[34m2026-01-08 00:08:49[39m] (step=0002270) Train Loss mse: 0.0000, Train Loss ce: 0.0572, Train Steps/Sec: 0.12,
|
| 2473 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step2500
|
| 2474 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 2475 |
-
[eval debug] first 3 batch fingerprints:
|
| 2476 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2477 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2478 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2479 |
-
ce_avg: 0.12304135411977768, mse_avg: 0.0
|
| 2480 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step3000
|
| 2481 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 2482 |
-
[eval debug] first 3 batch fingerprints:
|
| 2483 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2484 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2485 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2486 |
-
ce_avg: 0.05864739045500755, mse_avg: 0.0
|
| 2487 |
[[34m2026-01-08 00:08:58[39m] (step=0002271) Train Loss mse: 0.0000, Train Loss ce: 0.0576, Train Steps/Sec: 0.12,
|
| 2488 |
[[34m2026-01-08 00:09:06[39m] (step=0002272) Train Loss mse: 0.0000, Train Loss ce: 0.0577, Train Steps/Sec: 0.12,
|
| 2489 |
[[34m2026-01-08 00:09:14[39m] (step=0002273) Train Loss mse: 0.0000, Train Loss ce: 0.0565, Train Steps/Sec: 0.12,
|
|
@@ -3289,6 +3289,20 @@ ce_avg: 0.05864739045500755, mse_avg: 0.0
|
|
| 3289 |
[[34m2026-01-08 02:01:24[39m] (step=0003070) Train Loss mse: 0.0000, Train Loss ce: 0.0575, Train Steps/Sec: 0.12,
|
| 3290 |
[[34m2026-01-08 02:01:32[39m] (step=0003071) Train Loss mse: 0.0000, Train Loss ce: 0.0564, Train Steps/Sec: 0.12,
|
| 3291 |
[[34m2026-01-08 02:01:41[39m] (step=0003072) Train Loss mse: 0.0000, Train Loss ce: 0.0571, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3292 |
[[34m2026-01-08 02:01:49[39m] (step=0003073) Train Loss mse: 0.0000, Train Loss ce: 0.0556, Train Steps/Sec: 0.12,
|
| 3293 |
[[34m2026-01-08 02:01:57[39m] (step=0003074) Train Loss mse: 0.0000, Train Loss ce: 0.0567, Train Steps/Sec: 0.12,
|
| 3294 |
[[34m2026-01-08 02:02:05[39m] (step=0003075) Train Loss mse: 0.0000, Train Loss ce: 0.0563, Train Steps/Sec: 0.12,
|
|
@@ -3448,27 +3462,6 @@ ce_avg: 0.05864739045500755, mse_avg: 0.0
|
|
| 3448 |
[[34m2026-01-08 02:23:13[39m] (step=0003229) Train Loss mse: 0.0000, Train Loss ce: 0.0575, Train Steps/Sec: 0.12,
|
| 3449 |
[[34m2026-01-08 02:23:21[39m] (step=0003230) Train Loss mse: 0.0000, Train Loss ce: 0.0566, Train Steps/Sec: 0.12,
|
| 3450 |
[[34m2026-01-08 02:23:29[39m] (step=0003231) Train Loss mse: 0.0000, Train Loss ce: 0.0567, Train Steps/Sec: 0.12,
|
| 3451 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step3500
|
| 3452 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 3453 |
-
[eval debug] first 3 batch fingerprints:
|
| 3454 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3455 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3456 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3457 |
-
ce_avg: 0.058637309819459915, mse_avg: 0.0
|
| 3458 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step4000
|
| 3459 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 3460 |
-
[eval debug] first 3 batch fingerprints:
|
| 3461 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3462 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3463 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3464 |
-
ce_avg: 0.059058357030153275, mse_avg: 0.0
|
| 3465 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step4500
|
| 3466 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 3467 |
-
[eval debug] first 3 batch fingerprints:
|
| 3468 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3469 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3470 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3471 |
-
ce_avg: 0.05962677672505379, mse_avg: 0.0
|
| 3472 |
[[34m2026-01-08 02:23:38[39m] (step=0003232) Train Loss mse: 0.0000, Train Loss ce: 0.0555, Train Steps/Sec: 0.12,
|
| 3473 |
[[34m2026-01-08 02:23:46[39m] (step=0003233) Train Loss mse: 0.0000, Train Loss ce: 0.0562, Train Steps/Sec: 0.12,
|
| 3474 |
[[34m2026-01-08 02:23:54[39m] (step=0003234) Train Loss mse: 0.0000, Train Loss ce: 0.0572, Train Steps/Sec: 0.12,
|
|
@@ -4641,6 +4634,20 @@ ce_avg: 0.05962677672505379, mse_avg: 0.0
|
|
| 4641 |
[[34m2026-01-08 05:04:14[39m] (step=0004401) Train Loss mse: 0.0000, Train Loss ce: 0.0541, Train Steps/Sec: 0.12,
|
| 4642 |
[[34m2026-01-08 05:04:23[39m] (step=0004402) Train Loss mse: 0.0000, Train Loss ce: 0.0553, Train Steps/Sec: 0.12,
|
| 4643 |
[[34m2026-01-08 05:04:31[39m] (step=0004403) Train Loss mse: 0.0000, Train Loss ce: 0.0548, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4644 |
[[34m2026-01-08 05:04:39[39m] (step=0004404) Train Loss mse: 0.0000, Train Loss ce: 0.0545, Train Steps/Sec: 0.13,
|
| 4645 |
[[34m2026-01-08 05:04:47[39m] (step=0004405) Train Loss mse: 0.0000, Train Loss ce: 0.0554, Train Steps/Sec: 0.12,
|
| 4646 |
[[34m2026-01-08 05:04:55[39m] (step=0004406) Train Loss mse: 0.0000, Train Loss ce: 0.0546, Train Steps/Sec: 0.12,
|
|
@@ -4851,13 +4858,6 @@ ce_avg: 0.05962677672505379, mse_avg: 0.0
|
|
| 4851 |
[[34m2026-01-08 05:33:09[39m] (step=0004611) Train Loss mse: 0.0000, Train Loss ce: 0.0549, Train Steps/Sec: 0.12,
|
| 4852 |
[[34m2026-01-08 05:33:18[39m] (step=0004612) Train Loss mse: 0.0000, Train Loss ce: 0.0548, Train Steps/Sec: 0.12,
|
| 4853 |
[[34m2026-01-08 05:33:26[39m] (step=0004613) Train Loss mse: 0.0000, Train Loss ce: 0.0545, Train Steps/Sec: 0.12,
|
| 4854 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step5000
|
| 4855 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 4856 |
-
[eval debug] first 3 batch fingerprints:
|
| 4857 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 4858 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 4859 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 4860 |
-
ce_avg: 0.060032669454813004, mse_avg: 0.0
|
| 4861 |
[[34m2026-01-08 05:33:34[39m] (step=0004614) Train Loss mse: 0.0000, Train Loss ce: 0.0549, Train Steps/Sec: 0.12,
|
| 4862 |
[[34m2026-01-08 05:33:42[39m] (step=0004615) Train Loss mse: 0.0000, Train Loss ce: 0.0540, Train Steps/Sec: 0.12,
|
| 4863 |
[[34m2026-01-08 05:33:51[39m] (step=0004616) Train Loss mse: 0.0000, Train Loss ce: 0.0555, Train Steps/Sec: 0.12,
|
|
|
|
| 168 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 169 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 170 |
ce_avg: 0.06184878200292587, mse_avg: 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
wandb: Detected [huggingface_hub.inference] in use.
|
| 172 |
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
|
| 173 |
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
|
|
| 1082 |
[[34m2026-01-07 21:00:31[39m] (step=0000901) Train Loss mse: 0.0000, Train Loss ce: 0.0586, Train Steps/Sec: 0.12,
|
| 1083 |
[[34m2026-01-07 21:00:39[39m] (step=0000902) Train Loss mse: 0.0000, Train Loss ce: 0.0604, Train Steps/Sec: 0.12,
|
| 1084 |
[[34m2026-01-07 21:00:47[39m] (step=0000903) Train Loss mse: 0.0000, Train Loss ce: 0.0591, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1085 |
[[34m2026-01-07 21:00:55[39m] (step=0000904) Train Loss mse: 0.0000, Train Loss ce: 0.0592, Train Steps/Sec: 0.12,
|
| 1086 |
[[34m2026-01-07 21:01:04[39m] (step=0000905) Train Loss mse: 0.0000, Train Loss ce: 0.0595, Train Steps/Sec: 0.12,
|
| 1087 |
[[34m2026-01-07 21:01:12[39m] (step=0000906) Train Loss mse: 0.0000, Train Loss ce: 0.0606, Train Steps/Sec: 0.12,
|
|
|
|
| 1134 |
[[34m2026-01-07 21:07:39[39m] (step=0000953) Train Loss mse: 0.0000, Train Loss ce: 0.0580, Train Steps/Sec: 0.12,
|
| 1135 |
[[34m2026-01-07 21:07:47[39m] (step=0000954) Train Loss mse: 0.0000, Train Loss ce: 0.0604, Train Steps/Sec: 0.12,
|
| 1136 |
[[34m2026-01-07 21:07:55[39m] (step=0000955) Train Loss mse: 0.0000, Train Loss ce: 0.0597, Train Steps/Sec: 0.12,
|
| 1137 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step1000
|
| 1138 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 1139 |
+
[eval debug] first 3 batch fingerprints:
|
| 1140 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1141 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1142 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1143 |
+
ce_avg: 0.08925717324018478, mse_avg: 0.0
|
| 1144 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step1500
|
| 1145 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 1146 |
+
[eval debug] first 3 batch fingerprints:
|
| 1147 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1148 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1149 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1150 |
+
ce_avg: 0.1071508377790451, mse_avg: 0.0
|
| 1151 |
[[34m2026-01-07 21:08:03[39m] (step=0000956) Train Loss mse: 0.0000, Train Loss ce: 0.0595, Train Steps/Sec: 0.12,
|
| 1152 |
[[34m2026-01-07 21:08:12[39m] (step=0000957) Train Loss mse: 0.0000, Train Loss ce: 0.0602, Train Steps/Sec: 0.12,
|
| 1153 |
[[34m2026-01-07 21:08:20[39m] (step=0000958) Train Loss mse: 0.0000, Train Loss ce: 0.0602, Train Steps/Sec: 0.12,
|
|
|
|
| 2399 |
[[34m2026-01-07 23:59:46[39m] (step=0002204) Train Loss mse: 0.0000, Train Loss ce: 0.0569, Train Steps/Sec: 0.12,
|
| 2400 |
[[34m2026-01-07 23:59:54[39m] (step=0002205) Train Loss mse: 0.0000, Train Loss ce: 0.0562, Train Steps/Sec: 0.12,
|
| 2401 |
[[34m2026-01-08 00:00:02[39m] (step=0002206) Train Loss mse: 0.0000, Train Loss ce: 0.0587, Train Steps/Sec: 0.12,
|
| 2402 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step2000
|
| 2403 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 2404 |
+
[eval debug] first 3 batch fingerprints:
|
| 2405 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2406 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2407 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2408 |
+
ce_avg: 0.1155419573187828, mse_avg: 0.0
|
| 2409 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step2500
|
| 2410 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 2411 |
+
[eval debug] first 3 batch fingerprints:
|
| 2412 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2413 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2414 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2415 |
+
ce_avg: 0.12304135411977768, mse_avg: 0.0
|
| 2416 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step3000
|
| 2417 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 2418 |
+
[eval debug] first 3 batch fingerprints:
|
| 2419 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2420 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2421 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2422 |
+
ce_avg: 0.05864739045500755, mse_avg: 0.0
|
| 2423 |
[[34m2026-01-08 00:00:10[39m] (step=0002207) Train Loss mse: 0.0000, Train Loss ce: 0.0568, Train Steps/Sec: 0.12,
|
| 2424 |
[[34m2026-01-08 00:00:19[39m] (step=0002208) Train Loss mse: 0.0000, Train Loss ce: 0.0573, Train Steps/Sec: 0.12,
|
| 2425 |
[[34m2026-01-08 00:00:27[39m] (step=0002209) Train Loss mse: 0.0000, Train Loss ce: 0.0578, Train Steps/Sec: 0.12,
|
|
|
|
| 2484 |
[[34m2026-01-08 00:08:33[39m] (step=0002268) Train Loss mse: 0.0000, Train Loss ce: 0.0577, Train Steps/Sec: 0.12,
|
| 2485 |
[[34m2026-01-08 00:08:41[39m] (step=0002269) Train Loss mse: 0.0000, Train Loss ce: 0.0576, Train Steps/Sec: 0.12,
|
| 2486 |
[[34m2026-01-08 00:08:49[39m] (step=0002270) Train Loss mse: 0.0000, Train Loss ce: 0.0572, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2487 |
[[34m2026-01-08 00:08:58[39m] (step=0002271) Train Loss mse: 0.0000, Train Loss ce: 0.0576, Train Steps/Sec: 0.12,
|
| 2488 |
[[34m2026-01-08 00:09:06[39m] (step=0002272) Train Loss mse: 0.0000, Train Loss ce: 0.0577, Train Steps/Sec: 0.12,
|
| 2489 |
[[34m2026-01-08 00:09:14[39m] (step=0002273) Train Loss mse: 0.0000, Train Loss ce: 0.0565, Train Steps/Sec: 0.12,
|
|
|
|
| 3289 |
[[34m2026-01-08 02:01:24[39m] (step=0003070) Train Loss mse: 0.0000, Train Loss ce: 0.0575, Train Steps/Sec: 0.12,
|
| 3290 |
[[34m2026-01-08 02:01:32[39m] (step=0003071) Train Loss mse: 0.0000, Train Loss ce: 0.0564, Train Steps/Sec: 0.12,
|
| 3291 |
[[34m2026-01-08 02:01:41[39m] (step=0003072) Train Loss mse: 0.0000, Train Loss ce: 0.0571, Train Steps/Sec: 0.12,
|
| 3292 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step3500
|
| 3293 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 3294 |
+
[eval debug] first 3 batch fingerprints:
|
| 3295 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3296 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3297 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3298 |
+
ce_avg: 0.058637309819459915, mse_avg: 0.0
|
| 3299 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step4000
|
| 3300 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 3301 |
+
[eval debug] first 3 batch fingerprints:
|
| 3302 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3303 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3304 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3305 |
+
ce_avg: 0.059058357030153275, mse_avg: 0.0
|
| 3306 |
[[34m2026-01-08 02:01:49[39m] (step=0003073) Train Loss mse: 0.0000, Train Loss ce: 0.0556, Train Steps/Sec: 0.12,
|
| 3307 |
[[34m2026-01-08 02:01:57[39m] (step=0003074) Train Loss mse: 0.0000, Train Loss ce: 0.0567, Train Steps/Sec: 0.12,
|
| 3308 |
[[34m2026-01-08 02:02:05[39m] (step=0003075) Train Loss mse: 0.0000, Train Loss ce: 0.0563, Train Steps/Sec: 0.12,
|
|
|
|
| 3462 |
[[34m2026-01-08 02:23:13[39m] (step=0003229) Train Loss mse: 0.0000, Train Loss ce: 0.0575, Train Steps/Sec: 0.12,
|
| 3463 |
[[34m2026-01-08 02:23:21[39m] (step=0003230) Train Loss mse: 0.0000, Train Loss ce: 0.0566, Train Steps/Sec: 0.12,
|
| 3464 |
[[34m2026-01-08 02:23:29[39m] (step=0003231) Train Loss mse: 0.0000, Train Loss ce: 0.0567, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3465 |
[[34m2026-01-08 02:23:38[39m] (step=0003232) Train Loss mse: 0.0000, Train Loss ce: 0.0555, Train Steps/Sec: 0.12,
|
| 3466 |
[[34m2026-01-08 02:23:46[39m] (step=0003233) Train Loss mse: 0.0000, Train Loss ce: 0.0562, Train Steps/Sec: 0.12,
|
| 3467 |
[[34m2026-01-08 02:23:54[39m] (step=0003234) Train Loss mse: 0.0000, Train Loss ce: 0.0572, Train Steps/Sec: 0.12,
|
|
|
|
| 4634 |
[[34m2026-01-08 05:04:14[39m] (step=0004401) Train Loss mse: 0.0000, Train Loss ce: 0.0541, Train Steps/Sec: 0.12,
|
| 4635 |
[[34m2026-01-08 05:04:23[39m] (step=0004402) Train Loss mse: 0.0000, Train Loss ce: 0.0553, Train Steps/Sec: 0.12,
|
| 4636 |
[[34m2026-01-08 05:04:31[39m] (step=0004403) Train Loss mse: 0.0000, Train Loss ce: 0.0548, Train Steps/Sec: 0.12,
|
| 4637 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step4500
|
| 4638 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 4639 |
+
[eval debug] first 3 batch fingerprints:
|
| 4640 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 4641 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 4642 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 4643 |
+
ce_avg: 0.05962677672505379, mse_avg: 0.0
|
| 4644 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema993_hashed_step5000
|
| 4645 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 4646 |
+
[eval debug] first 3 batch fingerprints:
|
| 4647 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 4648 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 4649 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 4650 |
+
ce_avg: 0.060032669454813004, mse_avg: 0.0
|
| 4651 |
[[34m2026-01-08 05:04:39[39m] (step=0004404) Train Loss mse: 0.0000, Train Loss ce: 0.0545, Train Steps/Sec: 0.13,
|
| 4652 |
[[34m2026-01-08 05:04:47[39m] (step=0004405) Train Loss mse: 0.0000, Train Loss ce: 0.0554, Train Steps/Sec: 0.12,
|
| 4653 |
[[34m2026-01-08 05:04:55[39m] (step=0004406) Train Loss mse: 0.0000, Train Loss ce: 0.0546, Train Steps/Sec: 0.12,
|
|
|
|
| 4858 |
[[34m2026-01-08 05:33:09[39m] (step=0004611) Train Loss mse: 0.0000, Train Loss ce: 0.0549, Train Steps/Sec: 0.12,
|
| 4859 |
[[34m2026-01-08 05:33:18[39m] (step=0004612) Train Loss mse: 0.0000, Train Loss ce: 0.0548, Train Steps/Sec: 0.12,
|
| 4860 |
[[34m2026-01-08 05:33:26[39m] (step=0004613) Train Loss mse: 0.0000, Train Loss ce: 0.0545, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4861 |
[[34m2026-01-08 05:33:34[39m] (step=0004614) Train Loss mse: 0.0000, Train Loss ce: 0.0549, Train Steps/Sec: 0.12,
|
| 4862 |
[[34m2026-01-08 05:33:42[39m] (step=0004615) Train Loss mse: 0.0000, Train Loss ce: 0.0540, Train Steps/Sec: 0.12,
|
| 4863 |
[[34m2026-01-08 05:33:51[39m] (step=0004616) Train Loss mse: 0.0000, Train Loss ce: 0.0555, Train Steps/Sec: 0.12,
|