Tsaisplus commited on
Commit
956d8d2
·
verified ·
1 Parent(s): 7571c51

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. added_tokens.json +36 -0
  3. all_results.json +8 -0
  4. config.json +237 -0
  5. generation_config.json +4 -0
  6. merges.txt +0 -0
  7. model-00001-of-00002.safetensors +3 -0
  8. model-00002-of-00002.safetensors +3 -0
  9. model.safetensors.index.json +0 -0
  10. resolved_train_config.json +83 -0
  11. special_tokens_map.json +31 -0
  12. tokenizer.json +0 -0
  13. tokenizer_config.json +313 -0
  14. train_results.json +8 -0
  15. trainer_state.json +0 -0
  16. training_args.bin +3 -0
  17. training_log.txt +3 -0
  18. vocab.json +0 -0
  19. wandb/debug-internal.log +29 -0
  20. wandb/debug.log +24 -0
  21. wandb/run-20260225_055938-u2vegsv1/files/config.yaml +767 -0
  22. wandb/run-20260225_055938-u2vegsv1/files/output.log +1323 -0
  23. wandb/run-20260225_055938-u2vegsv1/files/requirements.txt +113 -0
  24. wandb/run-20260225_055938-u2vegsv1/files/wandb-metadata.json +144 -0
  25. wandb/run-20260225_055938-u2vegsv1/files/wandb-summary.json +1 -0
  26. wandb/run-20260225_055938-u2vegsv1/logs/debug-core.log +14 -0
  27. wandb/run-20260225_055938-u2vegsv1/logs/debug-internal.log +11 -0
  28. wandb/run-20260225_055938-u2vegsv1/logs/debug.log +24 -0
  29. wandb/run-20260225_055938-u2vegsv1/run-u2vegsv1.wandb +3 -0
  30. wandb/run-20260225_063717-0ub00jhc/files/output.log +398 -0
  31. wandb/run-20260225_063717-0ub00jhc/files/requirements.txt +113 -0
  32. wandb/run-20260225_063717-0ub00jhc/files/wandb-metadata.json +144 -0
  33. wandb/run-20260225_063717-0ub00jhc/logs/debug-core.log +7 -0
  34. wandb/run-20260225_063717-0ub00jhc/logs/debug-internal.log +6 -0
  35. wandb/run-20260225_063717-0ub00jhc/logs/debug.log +22 -0
  36. wandb/run-20260225_063717-0ub00jhc/run-0ub00jhc.wandb +3 -0
  37. wandb/run-20260225_094307-hmhb8ltr/files/output.log +0 -0
  38. wandb/run-20260225_094307-hmhb8ltr/files/requirements.txt +113 -0
  39. wandb/run-20260225_094307-hmhb8ltr/files/wandb-metadata.json +144 -0
  40. wandb/run-20260225_094307-hmhb8ltr/logs/debug-core.log +7 -0
  41. wandb/run-20260225_094307-hmhb8ltr/logs/debug-internal.log +14 -0
  42. wandb/run-20260225_094307-hmhb8ltr/logs/debug.log +22 -0
  43. wandb/run-20260225_094307-hmhb8ltr/run-hmhb8ltr.wandb +3 -0
  44. wandb/run-20260225_111518-pg2w7c3p/files/config.yaml +767 -0
  45. wandb/run-20260225_111518-pg2w7c3p/files/output.log +0 -0
  46. wandb/run-20260225_111518-pg2w7c3p/files/requirements.txt +113 -0
  47. wandb/run-20260225_111518-pg2w7c3p/files/wandb-metadata.json +144 -0
  48. wandb/run-20260225_111518-pg2w7c3p/files/wandb-summary.json +1 -0
  49. wandb/run-20260225_111518-pg2w7c3p/logs/debug-core.log +14 -0
  50. wandb/run-20260225_111518-pg2w7c3p/logs/debug-internal.log +29 -0
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ training_log.txt filter=lfs diff=lfs merge=lfs -text
37
+ wandb/run-20260225_055938-u2vegsv1/run-u2vegsv1.wandb filter=lfs diff=lfs merge=lfs -text
38
+ wandb/run-20260225_063717-0ub00jhc/run-0ub00jhc.wandb filter=lfs diff=lfs merge=lfs -text
39
+ wandb/run-20260225_094307-hmhb8ltr/run-hmhb8ltr.wandb filter=lfs diff=lfs merge=lfs -text
40
+ wandb/run-20260225_111518-pg2w7c3p/run-pg2w7c3p.wandb filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</box>": 151673,
3
+ "</img>": 151666,
4
+ "</quad>": 151669,
5
+ "</ref>": 151671,
6
+ "</tool_call>": 151658,
7
+ "<IMG_CONTEXT>": 151667,
8
+ "<box>": 151672,
9
+ "<cand>": 151674,
10
+ "<e_cand>": 151676,
11
+ "<e_s>": 151675,
12
+ "<img>": 151665,
13
+ "<quad>": 151668,
14
+ "<ref>": 151670,
15
+ "<tool_call>": 151657,
16
+ "<|box_end|>": 151649,
17
+ "<|box_start|>": 151648,
18
+ "<|endoftext|>": 151643,
19
+ "<|file_sep|>": 151664,
20
+ "<|fim_middle|>": 151660,
21
+ "<|fim_pad|>": 151662,
22
+ "<|fim_prefix|>": 151659,
23
+ "<|fim_suffix|>": 151661,
24
+ "<|im_end|>": 151645,
25
+ "<|im_start|>": 151644,
26
+ "<|image_pad|>": 151655,
27
+ "<|object_ref_end|>": 151647,
28
+ "<|object_ref_start|>": 151646,
29
+ "<|quad_end|>": 151651,
30
+ "<|quad_start|>": 151650,
31
+ "<|repo_name|>": 151663,
32
+ "<|video_pad|>": 151656,
33
+ "<|vision_end|>": 151653,
34
+ "<|vision_pad|>": 151654,
35
+ "<|vision_start|>": 151652
36
+ }
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.3191605252492171,
4
+ "train_runtime": 117088.0105,
5
+ "train_samples": -1,
6
+ "train_samples_per_second": 0.318,
7
+ "train_steps_per_second": 0.079
8
+ }
config.json ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "_name_or_path": "../pretrained/InternVL3-2B",
4
+ "architectures": [
5
+ "InternVLChatModel"
6
+ ],
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
9
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
10
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
11
+ },
12
+ "bev_image_size": 448,
13
+ "downsample_ratio": 0.5,
14
+ "dual_text_pos_injection": true,
15
+ "dynamic_image_size": false,
16
+ "force_image_size": 448,
17
+ "hidden_size": 1536,
18
+ "image_fold": null,
19
+ "llm_config": {
20
+ "_attn_implementation_autoset": true,
21
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
22
+ "add_cross_attention": false,
23
+ "architectures": [
24
+ "Qwen2ForCausalLM"
25
+ ],
26
+ "attention_dropout": 0.0,
27
+ "attn_implementation": "flash_attention_2",
28
+ "bad_words_ids": null,
29
+ "begin_suppress_tokens": null,
30
+ "bos_token_id": 151643,
31
+ "chunk_size_feed_forward": 0,
32
+ "cross_attention_hidden_size": null,
33
+ "decoder_start_token_id": null,
34
+ "diversity_penalty": 0.0,
35
+ "do_sample": false,
36
+ "early_stopping": false,
37
+ "encoder_no_repeat_ngram_size": 0,
38
+ "eos_token_id": 151643,
39
+ "exponential_decay_length_penalty": null,
40
+ "finetuning_task": null,
41
+ "forced_bos_token_id": null,
42
+ "forced_eos_token_id": null,
43
+ "hidden_act": "silu",
44
+ "hidden_size": 1536,
45
+ "id2label": {
46
+ "0": "LABEL_0",
47
+ "1": "LABEL_1"
48
+ },
49
+ "initializer_range": 0.02,
50
+ "intermediate_size": 8960,
51
+ "is_decoder": false,
52
+ "is_encoder_decoder": false,
53
+ "label2id": {
54
+ "LABEL_0": 0,
55
+ "LABEL_1": 1
56
+ },
57
+ "length_penalty": 1.0,
58
+ "max_length": 20,
59
+ "max_position_embeddings": 32768,
60
+ "max_window_layers": 70,
61
+ "min_length": 0,
62
+ "model_type": "qwen2",
63
+ "moe_config": null,
64
+ "no_repeat_ngram_size": 0,
65
+ "num_attention_heads": 12,
66
+ "num_beam_groups": 1,
67
+ "num_beams": 1,
68
+ "num_hidden_layers": 28,
69
+ "num_key_value_heads": 2,
70
+ "num_return_sequences": 1,
71
+ "output_attentions": false,
72
+ "output_hidden_states": false,
73
+ "output_scores": false,
74
+ "pad_token_id": null,
75
+ "prefix": null,
76
+ "problem_type": null,
77
+ "pruned_heads": {},
78
+ "remove_invalid_values": false,
79
+ "repetition_penalty": 1.0,
80
+ "return_dict": true,
81
+ "return_dict_in_generate": false,
82
+ "rms_norm_eps": 1e-06,
83
+ "rope_scaling": {
84
+ "factor": 2.0,
85
+ "rope_type": "dynamic",
86
+ "type": "dynamic"
87
+ },
88
+ "rope_theta": 1000000.0,
89
+ "sep_token_id": null,
90
+ "sliding_window": null,
91
+ "suppress_tokens": null,
92
+ "task_specific_params": null,
93
+ "temperature": 1.0,
94
+ "tf_legacy_loss": false,
95
+ "tie_encoder_decoder": false,
96
+ "tie_word_embeddings": false,
97
+ "tokenizer_class": null,
98
+ "top_k": 50,
99
+ "top_p": 1.0,
100
+ "torch_dtype": "bfloat16",
101
+ "torchscript": false,
102
+ "transformers_version": "4.37.2",
103
+ "typical_p": 1.0,
104
+ "use_bfloat16": true,
105
+ "use_cache": false,
106
+ "use_sliding_window": false,
107
+ "vocab_size": 151677
108
+ },
109
+ "max_dynamic_patch": 12,
110
+ "min_dynamic_patch": 1,
111
+ "model_type": "internvl_chat",
112
+ "num_image_token_bev": 256,
113
+ "num_image_token_ego": 32,
114
+ "pad2square": false,
115
+ "ps_version": "v2",
116
+ "select_layer": -1,
117
+ "system_message": "You are an autonomous navigation agent operating in indoor environments. You receive spatial information through position embeddings injected into visual features and text tokens. Use the BEV map, position embeddings, and semantic information to make navigation decisions. When the target object is detected (<target> marker), navigate directly to it. Otherwise, explore frontiers strategically to find the goal object.",
118
+ "template": "internvl2_5_nav",
119
+ "tie_word_embeddings": false,
120
+ "torch_dtype": "bfloat16",
121
+ "transformers_version": null,
122
+ "use_backbone_lora": 0,
123
+ "use_llm_lora": 64,
124
+ "use_pairwise_spatial_encoder": false,
125
+ "use_position_embeddings": true,
126
+ "use_thumbnail": true,
127
+ "vision_config": {
128
+ "_attn_implementation_autoset": true,
129
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
130
+ "add_cross_attention": false,
131
+ "architectures": [
132
+ "InternVisionModel"
133
+ ],
134
+ "attention_dropout": 0.0,
135
+ "auto_map": {
136
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
137
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
138
+ },
139
+ "bad_words_ids": null,
140
+ "begin_suppress_tokens": null,
141
+ "bos_token_id": null,
142
+ "capacity_factor": 1.2,
143
+ "chunk_size_feed_forward": 0,
144
+ "cross_attention_hidden_size": null,
145
+ "decoder_start_token_id": null,
146
+ "diversity_penalty": 0.0,
147
+ "do_sample": false,
148
+ "drop_path_rate": 0.0,
149
+ "dropout": 0.0,
150
+ "early_stopping": false,
151
+ "encoder_no_repeat_ngram_size": 0,
152
+ "eos_token_id": null,
153
+ "eval_capacity_factor": 1.4,
154
+ "exponential_decay_length_penalty": null,
155
+ "finetuning_task": null,
156
+ "forced_bos_token_id": null,
157
+ "forced_eos_token_id": null,
158
+ "hidden_act": "gelu",
159
+ "hidden_size": 1024,
160
+ "id2label": {
161
+ "0": "LABEL_0",
162
+ "1": "LABEL_1"
163
+ },
164
+ "image_size": 448,
165
+ "initializer_factor": 0.1,
166
+ "initializer_range": 1e-10,
167
+ "intermediate_size": 4096,
168
+ "is_decoder": false,
169
+ "is_encoder_decoder": false,
170
+ "label2id": {
171
+ "LABEL_0": 0,
172
+ "LABEL_1": 1
173
+ },
174
+ "laux_allreduce": "all_nodes",
175
+ "layer_norm_eps": 1e-06,
176
+ "length_penalty": 1.0,
177
+ "max_length": 20,
178
+ "min_length": 0,
179
+ "model_type": "intern_vit_6b",
180
+ "moe_coeff_ratio": 0.5,
181
+ "moe_intermediate_size": 768,
182
+ "moe_output_scale": 4.0,
183
+ "no_repeat_ngram_size": 0,
184
+ "noisy_gate_policy": "RSample_before",
185
+ "norm_type": "layer_norm",
186
+ "num_attention_heads": 16,
187
+ "num_beam_groups": 1,
188
+ "num_beams": 1,
189
+ "num_channels": 3,
190
+ "num_experts": 8,
191
+ "num_hidden_layers": 24,
192
+ "num_return_sequences": 1,
193
+ "num_routed_experts": 4,
194
+ "num_shared_experts": 4,
195
+ "output_attentions": false,
196
+ "output_hidden_states": false,
197
+ "output_scores": false,
198
+ "pad_token_id": null,
199
+ "patch_size": 14,
200
+ "prefix": null,
201
+ "problem_type": null,
202
+ "pruned_heads": {},
203
+ "qk_normalization": false,
204
+ "qkv_bias": true,
205
+ "remove_invalid_values": false,
206
+ "repetition_penalty": 1.0,
207
+ "return_dict": true,
208
+ "return_dict_in_generate": false,
209
+ "sep_token_id": null,
210
+ "shared_expert_intermediate_size": 3072,
211
+ "suppress_tokens": null,
212
+ "task_specific_params": null,
213
+ "temperature": 1.0,
214
+ "tf_legacy_loss": false,
215
+ "tie_encoder_decoder": false,
216
+ "tie_word_embeddings": true,
217
+ "tokenizer_class": null,
218
+ "top_k": 50,
219
+ "top_p": 1.0,
220
+ "torch_dtype": "bfloat16",
221
+ "torchscript": false,
222
+ "transformers_version": "4.37.2",
223
+ "typical_p": 1.0,
224
+ "use_bfloat16": true,
225
+ "use_flash_attn": true,
226
+ "use_moe": false,
227
+ "use_residual": true,
228
+ "use_rts": false,
229
+ "use_weighted_residual": false
230
+ },
231
+ "vit_bev_freeze": true,
232
+ "vit_bev_lora_rank": 64,
233
+ "vit_bev_use_lora": true,
234
+ "vit_rgb_freeze": true,
235
+ "vit_rgb_lora_rank": 16,
236
+ "vit_rgb_use_lora": true
237
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.37.2"
4
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f8f7cace65c206ed677803c7a56351d148073ccde09768ac4e0c8013155650f
3
+ size 4997765528
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df61243e02c9244bc214ea44c598360ae3a028126816819deb77d6ca8a3c3152
3
+ size 8563840
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
resolved_train_config.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name_or_path": "../pretrained/InternVL3-2B",
3
+ "freeze_backbone": true,
4
+ "unfreeze_vit_layers": 0,
5
+ "use_backbone_lora": 0,
6
+ "freeze_llm": true,
7
+ "unfreeze_lm_head": false,
8
+ "use_llm_lora": 64,
9
+ "freeze_mlp": false,
10
+ "vision_select_layer": -1,
11
+ "grad_checkpoint": true,
12
+ "gradient_checkpointing": true,
13
+ "drop_path_rate": 0.0,
14
+ "ps_version": "v2",
15
+ "use_fast_tokenizer": true,
16
+ "max_seq_length": 24576,
17
+ "force_image_size": 448,
18
+ "down_sample_ratio": 0.5,
19
+ "pad2square": true,
20
+ "conv_style": "internvl2_5_nav",
21
+ "meta_path": "",
22
+ "use_data_resampling": false,
23
+ "dynamic_image_size": false,
24
+ "use_thumbnail": true,
25
+ "min_dynamic_patch": 1,
26
+ "max_dynamic_patch": 12,
27
+ "normalize_type": "imagenet",
28
+ "use_packed_ds": true,
29
+ "num_images_expected": 1000,
30
+ "max_packed_tokens": 24576,
31
+ "max_buffer_size": 20,
32
+ "log_freq": 1000,
33
+ "strict_mode": false,
34
+ "replacement": true,
35
+ "allow_overflow": false,
36
+ "loss_reduction": "square",
37
+ "loss_reduction_all_gather": false,
38
+ "seed": 42,
39
+ "output_dir": "",
40
+ "overwrite_output_dir": true,
41
+ "report_to": "wandb",
42
+ "save_strategy": "steps",
43
+ "save_total_limit": 1,
44
+ "save_steps": 0.5,
45
+ "logging_steps": 1,
46
+ "evaluation_strategy": "no",
47
+ "dataloader_num_workers": 2,
48
+ "group_by_length": false,
49
+ "deepspeed": "zero_stage2_config_acc1.json",
50
+ "remove_unused_columns": false,
51
+ "do_train": true,
52
+ "bf16": true,
53
+ "learning_rate": 0.0001,
54
+ "weight_decay": 0.01,
55
+ "warmup_ratio": 0.03,
56
+ "lr_scheduler_type": "cosine",
57
+ "max_grad_norm": 1.0,
58
+ "num_train_epochs": 1,
59
+ "per_device_train_batch_size": 1,
60
+ "gradient_accumulation_steps": 1,
61
+ "max_steps": 8000,
62
+ "template_name": "BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY",
63
+ "use_nav_system_message": true,
64
+ "use_position_embeddings": true,
65
+ "use_pairwise_spatial_encoder": false,
66
+ "use_dual_vit": true,
67
+ "bev_image_size": 448,
68
+ "num_image_token_bev": 256,
69
+ "num_image_token_ego": 32,
70
+ "vit_bev_freeze": true,
71
+ "vit_bev_use_lora": true,
72
+ "vit_bev_lora_rank": 64,
73
+ "vit_rgb_freeze": true,
74
+ "vit_rgb_use_lora": true,
75
+ "vit_rgb_lora_rank": 16,
76
+ "position_placeholders": [
77
+ "<s>",
78
+ "<cand>",
79
+ "<e_s>",
80
+ "<e_cand>"
81
+ ],
82
+ "dual_text_pos_injection": true
83
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": false,
5
+ "added_tokens_decoder": {
6
+ "128245": {
7
+ "content": "<s>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "151643": {
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "151644": {
23
+ "content": "<|im_start|>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "151645": {
31
+ "content": "<|im_end|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "151646": {
39
+ "content": "<|object_ref_start|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "151647": {
47
+ "content": "<|object_ref_end|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "151648": {
55
+ "content": "<|box_start|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "151649": {
63
+ "content": "<|box_end|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": false,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "151650": {
71
+ "content": "<|quad_start|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "151651": {
79
+ "content": "<|quad_end|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": false,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "151652": {
87
+ "content": "<|vision_start|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "151653": {
95
+ "content": "<|vision_end|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "151654": {
103
+ "content": "<|vision_pad|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "151655": {
111
+ "content": "<|image_pad|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": false,
115
+ "single_word": false,
116
+ "special": true
117
+ },
118
+ "151656": {
119
+ "content": "<|video_pad|>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": true
125
+ },
126
+ "151657": {
127
+ "content": "<tool_call>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": false
133
+ },
134
+ "151658": {
135
+ "content": "</tool_call>",
136
+ "lstrip": false,
137
+ "normalized": false,
138
+ "rstrip": false,
139
+ "single_word": false,
140
+ "special": false
141
+ },
142
+ "151659": {
143
+ "content": "<|fim_prefix|>",
144
+ "lstrip": false,
145
+ "normalized": false,
146
+ "rstrip": false,
147
+ "single_word": false,
148
+ "special": false
149
+ },
150
+ "151660": {
151
+ "content": "<|fim_middle|>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false,
156
+ "special": false
157
+ },
158
+ "151661": {
159
+ "content": "<|fim_suffix|>",
160
+ "lstrip": false,
161
+ "normalized": false,
162
+ "rstrip": false,
163
+ "single_word": false,
164
+ "special": false
165
+ },
166
+ "151662": {
167
+ "content": "<|fim_pad|>",
168
+ "lstrip": false,
169
+ "normalized": false,
170
+ "rstrip": false,
171
+ "single_word": false,
172
+ "special": false
173
+ },
174
+ "151663": {
175
+ "content": "<|repo_name|>",
176
+ "lstrip": false,
177
+ "normalized": false,
178
+ "rstrip": false,
179
+ "single_word": false,
180
+ "special": false
181
+ },
182
+ "151664": {
183
+ "content": "<|file_sep|>",
184
+ "lstrip": false,
185
+ "normalized": false,
186
+ "rstrip": false,
187
+ "single_word": false,
188
+ "special": false
189
+ },
190
+ "151665": {
191
+ "content": "<img>",
192
+ "lstrip": false,
193
+ "normalized": false,
194
+ "rstrip": false,
195
+ "single_word": false,
196
+ "special": true
197
+ },
198
+ "151666": {
199
+ "content": "</img>",
200
+ "lstrip": false,
201
+ "normalized": false,
202
+ "rstrip": false,
203
+ "single_word": false,
204
+ "special": true
205
+ },
206
+ "151667": {
207
+ "content": "<IMG_CONTEXT>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false,
212
+ "special": true
213
+ },
214
+ "151668": {
215
+ "content": "<quad>",
216
+ "lstrip": false,
217
+ "normalized": false,
218
+ "rstrip": false,
219
+ "single_word": false,
220
+ "special": true
221
+ },
222
+ "151669": {
223
+ "content": "</quad>",
224
+ "lstrip": false,
225
+ "normalized": false,
226
+ "rstrip": false,
227
+ "single_word": false,
228
+ "special": true
229
+ },
230
+ "151670": {
231
+ "content": "<ref>",
232
+ "lstrip": false,
233
+ "normalized": false,
234
+ "rstrip": false,
235
+ "single_word": false,
236
+ "special": true
237
+ },
238
+ "151671": {
239
+ "content": "</ref>",
240
+ "lstrip": false,
241
+ "normalized": false,
242
+ "rstrip": false,
243
+ "single_word": false,
244
+ "special": true
245
+ },
246
+ "151672": {
247
+ "content": "<box>",
248
+ "lstrip": false,
249
+ "normalized": false,
250
+ "rstrip": false,
251
+ "single_word": false,
252
+ "special": true
253
+ },
254
+ "151673": {
255
+ "content": "</box>",
256
+ "lstrip": false,
257
+ "normalized": false,
258
+ "rstrip": false,
259
+ "single_word": false,
260
+ "special": true
261
+ },
262
+ "151674": {
263
+ "content": "<cand>",
264
+ "lstrip": false,
265
+ "normalized": false,
266
+ "rstrip": false,
267
+ "single_word": false,
268
+ "special": true
269
+ },
270
+ "151675": {
271
+ "content": "<e_s>",
272
+ "lstrip": false,
273
+ "normalized": false,
274
+ "rstrip": false,
275
+ "single_word": false,
276
+ "special": true
277
+ },
278
+ "151676": {
279
+ "content": "<e_cand>",
280
+ "lstrip": false,
281
+ "normalized": false,
282
+ "rstrip": false,
283
+ "single_word": false,
284
+ "special": true
285
+ }
286
+ },
287
+ "additional_special_tokens": [
288
+ "<|im_start|>",
289
+ "<|im_end|>",
290
+ "<|object_ref_start|>",
291
+ "<|object_ref_end|>",
292
+ "<|box_start|>",
293
+ "<|box_end|>",
294
+ "<|quad_start|>",
295
+ "<|quad_end|>",
296
+ "<|vision_start|>",
297
+ "<|vision_end|>",
298
+ "<|vision_pad|>",
299
+ "<|image_pad|>",
300
+ "<|video_pad|>"
301
+ ],
302
+ "bos_token": null,
303
+ "chat_template": "{%- if messages[0]['role'] == 'system' %}{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}{%- else %}{{- '<|im_start|>system\n你是书生·万象,英文名是InternVL,是由上海人工智能实验室、清华大学及多家合作单位联合开发的多模态大语言模型。<|im_end|>\n' }}{%- endif %}{% for message in messages %}{%- if messages[0]['role'] != 'system' or not loop.first %}{{'<|im_start|>' + message['role'] + '\n'}}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<image>\n' }}{% elif content['type'] == 'video' %}{{ '<video>\n' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{'<|im_end|>\n'}}{%- endif %}{% endfor %}{% if add_generation_prompt %}{{'<|im_start|>assistant\n' }}{% endif %}",
304
+ "clean_up_tokenization_spaces": false,
305
+ "eos_token": "<|im_end|>",
306
+ "errors": "replace",
307
+ "extra_special_tokens": {},
308
+ "model_max_length": 24576,
309
+ "pad_token": "<|endoftext|>",
310
+ "split_special_tokens": false,
311
+ "tokenizer_class": "Qwen2Tokenizer",
312
+ "unk_token": null
313
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.3191605252492171,
4
+ "train_runtime": 117088.0105,
5
+ "train_samples": -1,
6
+ "train_samples_per_second": 0.318,
7
+ "train_steps_per_second": 0.079
8
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7589102ab8fa3bd0e675ac7553bdd0895800c35df3fa2763096b83dac317d48
3
+ size 6328
training_log.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8060c9bf6ec381f46c7e1d689880c3c357d7891835ec14e0468f2ba7af26de61
3
+ size 15676128
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug-internal.log ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-02-25T11:15:18.266363836+08:00","level":"INFO","msg":"stream: starting","core version":"0.22.3"}
2
+ {"time":"2026-02-25T11:15:23.025454392+08:00","level":"INFO","msg":"stream: created new stream","id":"pg2w7c3p"}
3
+ {"time":"2026-02-25T11:15:23.025989355+08:00","level":"INFO","msg":"handler: started","stream_id":"pg2w7c3p"}
4
+ {"time":"2026-02-25T11:15:23.029061332+08:00","level":"INFO","msg":"stream: started","id":"pg2w7c3p"}
5
+ {"time":"2026-02-25T11:15:23.029075662+08:00","level":"INFO","msg":"sender: started","stream_id":"pg2w7c3p"}
6
+ {"time":"2026-02-25T11:15:23.02907461+08:00","level":"INFO","msg":"writer: started","stream_id":"pg2w7c3p"}
7
+ {"time":"2026-02-25T15:48:54.519398391+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/tsaisplus-nanyang-technological-university-singapore/prompt_revision/pg2w7c3p/file_stream\": EOF"}
8
+ {"time":"2026-02-25T21:07:59.713263866+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": unexpected EOF"}
9
+ {"time":"2026-02-26T01:46:00.156791646+08:00","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":64931}
10
+ {"time":"2026-02-26T01:46:01.26570749+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
11
+ {"time":"2026-02-26T01:46:04.155088853+08:00","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":13}
12
+ {"time":"2026-02-26T02:27:44.815648016+08:00","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":68065}
13
+ {"time":"2026-02-26T02:27:46.348757133+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
14
+ {"time":"2026-02-26T02:27:49.049539083+08:00","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":13}
15
+ {"time":"2026-02-26T03:22:31.469120504+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
16
+ {"time":"2026-02-26T03:22:33.406774848+08:00","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":72171}
17
+ {"time":"2026-02-26T03:22:34.088783654+08:00","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":13}
18
+ {"time":"2026-02-26T08:26:46.733939909+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
19
+ {"time":"2026-02-26T08:26:53.80398635+08:00","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":95045}
20
+ {"time":"2026-02-26T08:26:57.006489641+08:00","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":13}
21
+ {"time":"2026-02-26T09:11:42.493210656+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/tsaisplus-nanyang-technological-university-singapore/prompt_revision/pg2w7c3p/file_stream\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)"}
22
+ {"time":"2026-02-26T12:21:30.540150918+08:00","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":112659}
23
+ {"time":"2026-02-26T12:21:31.920394554+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
24
+ {"time":"2026-02-26T12:21:34.277988733+08:00","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":13}
25
+ {"time":"2026-02-26T19:46:56.115836343+08:00","level":"INFO","msg":"stream: closing","id":"pg2w7c3p"}
26
+ {"time":"2026-02-26T19:47:01.871500678+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
27
+ {"time":"2026-02-26T19:47:02.310370252+08:00","level":"INFO","msg":"handler: closed","stream_id":"pg2w7c3p"}
28
+ {"time":"2026-02-26T19:47:02.312032251+08:00","level":"INFO","msg":"sender: closed","stream_id":"pg2w7c3p"}
29
+ {"time":"2026-02-26T19:47:02.312485027+08:00","level":"INFO","msg":"stream: closed","id":"pg2w7c3p"}
wandb/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-02-25 11:15:18,028 INFO MainThread:121696 [wandb_setup.py:_flush():81] Current SDK version is 0.22.3
2
+ 2026-02-25 11:15:18,030 INFO MainThread:121696 [wandb_setup.py:_flush():81] Configure stats pid to 121696
3
+ 2026-02-25 11:15:18,031 INFO MainThread:121696 [wandb_setup.py:_flush():81] Loading settings from /mnt/petrelfs/wangmaonan/.config/wandb/settings
4
+ 2026-02-25 11:15:18,031 INFO MainThread:121696 [wandb_setup.py:_flush():81] Loading settings from /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/wandb/settings
5
+ 2026-02-25 11:15:18,032 INFO MainThread:121696 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2026-02-25 11:15:18,032 INFO MainThread:121696 [wandb_init.py:setup_run_log_directory():706] Logging user logs to /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/wandb/run-20260225_111518-pg2w7c3p/logs/debug.log
7
+ 2026-02-25 11:15:18,033 INFO MainThread:121696 [wandb_init.py:setup_run_log_directory():707] Logging internal logs to /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/wandb/run-20260225_111518-pg2w7c3p/logs/debug-internal.log
8
+ 2026-02-25 11:15:18,034 INFO MainThread:121696 [wandb_init.py:init():833] calling init triggers
9
+ 2026-02-25 11:15:18,034 INFO MainThread:121696 [wandb_init.py:init():838] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2026-02-25 11:15:18,035 INFO MainThread:121696 [wandb_init.py:init():881] starting backend
12
+ 2026-02-25 11:15:18,255 INFO MainThread:121696 [wandb_init.py:init():884] sending inform_init request
13
+ 2026-02-25 11:15:18,261 INFO MainThread:121696 [wandb_init.py:init():892] backend started and connected
14
+ 2026-02-25 11:15:18,263 INFO MainThread:121696 [wandb_init.py:init():962] updated telemetry
15
+ 2026-02-25 11:15:18,290 INFO MainThread:121696 [wandb_init.py:init():986] communicating run to backend with 90.0 second timeout
16
+ 2026-02-25 11:15:28,713 INFO MainThread:121696 [wandb_init.py:init():1033] starting run threads in backend
17
+ 2026-02-25 11:15:28,992 INFO MainThread:121696 [wandb_run.py:_console_start():2506] atexit reg
18
+ 2026-02-25 11:15:28,993 INFO MainThread:121696 [wandb_run.py:_redirect():2354] redirect: wrap_raw
19
+ 2026-02-25 11:15:28,993 INFO MainThread:121696 [wandb_run.py:_redirect():2423] Wrapping output streams.
20
+ 2026-02-25 11:15:28,994 INFO MainThread:121696 [wandb_run.py:_redirect():2446] Redirects installed.
21
+ 2026-02-25 11:15:28,999 INFO MainThread:121696 [wandb_init.py:init():1073] run started, returning control to user process
22
+ 2026-02-25 11:15:29,002 INFO MainThread:121696 [wandb_run.py:_config_callback():1390] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['InternVLChatModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': None, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '../pretrained/InternVL3-2B', '_commit_hash': None, '_attn_implementation_internal': None, 'transformers_version': None, 'auto_map': {'AutoConfig': 'configuration_internvl_chat.InternVLChatConfig', 'AutoModel': 'modeling_internvl_chat.InternVLChatModel', 'AutoModelForCausalLM': 'modeling_internvl_chat.InternVLChatModel'}, 'hidden_size': 1536, 'image_fold': None, 'model_type': 'internvl_chat', 'system_message': 'You are an autonomous navigation agent operating in indoor environments. You receive spatial information through position embeddings injected into visual features and text tokens. Use the BEV map, position embeddings, and semantic information to make navigation decisions. When the target object is detected (<target> marker), navigate directly to it. Otherwise, explore frontiers strategically to find the goal object.', 'vision_config': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': True, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['InternVisionModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': None, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'OpenGVLab/InternViT-6B-448px-V1-5', 'transformers_version': '4.37.2', '_attn_implementation_autoset': True, 'auto_map': {'AutoConfig': 'configuration_intern_vit.InternVisionConfig', 'AutoModel': 'modeling_intern_vit.InternVisionModel'}, 'capacity_factor': 1.2, 'eval_capacity_factor': 1.4, 'laux_allreduce': 'all_nodes', 'model_type': 'intern_vit_6b', 'moe_coeff_ratio': 0.5, 'moe_intermediate_size': 768, 'moe_output_scale': 4.0, 'noisy_gate_policy': 'RSample_before', 'num_experts': 8, 'num_routed_experts': 4, 'num_shared_experts': 4, 'shared_expert_intermediate_size': 3072, 'use_moe': False, 'use_residual': True, 'use_rts': False, 'use_weighted_residual': False, 'hidden_size': 1024, 'intermediate_size': 4096, 'dropout': 0.0, 'drop_path_rate': 0.0, 'num_hidden_layers': 24, 'num_attention_heads': 16, 'num_channels': 3, 'patch_size': 14, 'image_size': 448, 'initializer_range': 1e-10, 'initializer_factor': 0.1, 'attention_dropout': 0.0, 'layer_norm_eps': 1e-06, 'hidden_act': 'gelu', 'norm_type': 'layer_norm', 'qkv_bias': True, 'qk_normalization': False, 'use_flash_attn': True}, 'llm_config': {'vocab_size': 151677, 'max_position_embeddings': 32768, 'hidden_size': 1536, 'intermediate_size': 8960, 'num_hidden_layers': 28, 'num_attention_heads': 12, 'use_sliding_window': False, 'sliding_window': None, 'max_window_layers': 70, 'num_key_value_heads': 2, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-06, 'use_cache': False, 'rope_theta': 1000000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': True, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['Qwen2ForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 151643, 'pad_token_id': None, 'eos_token_id': 151643, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './pretrained/Qwen2.5-32B-Instruct', 'transformers_version': '4.37.2', '_attn_implementation_autoset': True, 'model_type': 'qwen2', 'moe_config': None, 'rope_scaling': {'factor': 2.0, 'rope_type': 'dynamic', 'type': 'dynamic'}, 'attn_implementation': 'flash_attention_2'}, 'use_backbone_lora': 0, 'use_llm_lora': 64, 'pad2square': False, 'select_layer': -1, 'force_image_size': 448, 'downsample_ratio': 0.5, 'template': 'internvl2_5_nav', 'dynamic_image_size': False, 'use_thumbnail': True, 'ps_version': 'v2', 'min_dynamic_patch': 1, 'max_dynamic_patch': 12, 'num_image_token_bev': 256, 'num_image_token_ego': 32, 'use_pairwise_spatial_encoder': False, 'use_position_embeddings': True, 'dual_text_pos_injection': True, 'bev_image_size': 448, 'vit_bev_freeze': True, 'vit_bev_use_lora': True, 'vit_bev_lora_rank': 64, 'vit_rgb_freeze': True, 'vit_rgb_use_lora': True, 'vit_rgb_lora_rank': 16, 'output_dir': '/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': 9300, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'runs/Feb25_11-14-26_SH-IDC1-10-140-37-90', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 0.5, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 2, 'past_index': -1, 'run_name': 'a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY_steps9300_gpus4_acc1', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': 'zero_stage2_config_acc1.json', 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
23
+ 2026-02-26 19:46:56,115 INFO wandb-AsyncioManager-main:121696 [service_client.py:_forward_responses():80] Reached EOF.
24
+ 2026-02-26 19:46:56,116 INFO wandb-AsyncioManager-main:121696 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
wandb/run-20260225_055938-u2vegsv1/files/config.yaml ADDED
@@ -0,0 +1,767 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _attn_implementation_internal:
2
+ value: null
3
+ _commit_hash:
4
+ value: null
5
+ _name_or_path:
6
+ value: ../pretrained/InternVL3-2B
7
+ _wandb:
8
+ value:
9
+ cli_version: 0.22.3
10
+ e:
11
+ lcgegjlerikh15wodksbpfheadlvwekl:
12
+ args:
13
+ - /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/resolved_train_config.json
14
+ codePath: InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py
15
+ codePathLocal: internvl_cleaned/train/internvl_chat_finetune.py
16
+ cpu_count: 64
17
+ cpu_count_logical: 128
18
+ cudaVersion: "12.2"
19
+ disk:
20
+ /:
21
+ total: "524945911808"
22
+ used: "39460401152"
23
+ email: caiy0039@e.ntu.edu.sg
24
+ executable: /mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/bin/python3.10
25
+ git:
26
+ commit: f7d6fbe6a8031d29a95c2f3be79e99f96670a12e
27
+ remote: git@github.com:Yuxin916/CL_CoTNav.git
28
+ gpu: NVIDIA A100-SXM4-80GB
29
+ gpu_count: 8
30
+ gpu_nvidia:
31
+ - architecture: Ampere
32
+ cudaCores: 6912
33
+ memoryTotal: "85899345920"
34
+ name: NVIDIA A100-SXM4-80GB
35
+ uuid: GPU-9e78ad4b-b304-a199-38f4-24b3acd9b531
36
+ - architecture: Ampere
37
+ cudaCores: 6912
38
+ memoryTotal: "85899345920"
39
+ name: NVIDIA A100-SXM4-80GB
40
+ uuid: GPU-19c9429b-5ecf-0b76-7d81-03c36a449f32
41
+ - architecture: Ampere
42
+ cudaCores: 6912
43
+ memoryTotal: "85899345920"
44
+ name: NVIDIA A100-SXM4-80GB
45
+ uuid: GPU-57520d78-d04a-3028-bd40-5ebd78e123e6
46
+ - architecture: Ampere
47
+ cudaCores: 6912
48
+ memoryTotal: "85899345920"
49
+ name: NVIDIA A100-SXM4-80GB
50
+ uuid: GPU-0d94270a-8ad3-b3a6-4acd-145fccc36d85
51
+ - architecture: Ampere
52
+ cudaCores: 6912
53
+ memoryTotal: "85899345920"
54
+ name: NVIDIA A100-SXM4-80GB
55
+ uuid: GPU-b9b6fe80-e37a-b622-0deb-27ef46a965ff
56
+ - architecture: Ampere
57
+ cudaCores: 6912
58
+ memoryTotal: "85899345920"
59
+ name: NVIDIA A100-SXM4-80GB
60
+ uuid: GPU-7b870038-dbe6-4039-b6bf-f90517f43f6c
61
+ - architecture: Ampere
62
+ cudaCores: 6912
63
+ memoryTotal: "85899345920"
64
+ name: NVIDIA A100-SXM4-80GB
65
+ uuid: GPU-e3065d01-9539-64c9-417f-91273b521051
66
+ - architecture: Ampere
67
+ cudaCores: 6912
68
+ memoryTotal: "85899345920"
69
+ name: NVIDIA A100-SXM4-80GB
70
+ uuid: GPU-28ce6eed-85b8-155a-5a5d-412dfb1dd1c0
71
+ host: SH-IDC1-10-140-37-45
72
+ memory:
73
+ total: "1081627828224"
74
+ os: Linux-3.10.0-957.el7.x86_64-x86_64-with-glibc2.17
75
+ program: /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py
76
+ python: CPython 3.10.18
77
+ root: /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY
78
+ slurm:
79
+ cluster_name: cluster_sproject3
80
+ conf: /etc/slurm/slurm.conf
81
+ cpus_on_node: "128"
82
+ cpus_per_task: "8"
83
+ distribution: cyclic
84
+ gtids: "0"
85
+ job_account: research
86
+ job_cpus_per_node: "128"
87
+ job_cpus_per_node_pack_group_0: "128"
88
+ job_gid: "200000139"
89
+ job_gpus: 0,1,2,3,4,5,6,7
90
+ job_id: "7464467"
91
+ job_name: vlm_ft
92
+ job_nodelist: SH-IDC1-10-140-37-45
93
+ job_num_nodes: "1"
94
+ job_partition: interntmp
95
+ job_qos: normal
96
+ job_uid: "200000139"
97
+ job_user: wangmaonan
98
+ jobid: "7464467"
99
+ launch_node_ipaddr: 10.140.37.45
100
+ localid: "0"
101
+ mem_per_node: "49152"
102
+ nnodes: "1"
103
+ node_aliases: (null)
104
+ nodeid: "0"
105
+ nodelist: SH-IDC1-10-140-37-45
106
+ nprocs: "1"
107
+ ntasks: "1"
108
+ ntasks_per_node: "1"
109
+ prio_process: "0"
110
+ procid: "0"
111
+ srun_comm_host: 10.140.37.45
112
+ srun_comm_port: "44377"
113
+ step_gpus: 0,1,2,3
114
+ step_id: "0"
115
+ step_launcher_port: "44377"
116
+ step_nodelist: SH-IDC1-10-140-37-45
117
+ step_num_nodes: "1"
118
+ step_num_tasks: "1"
119
+ step_tasks_per_node: "1"
120
+ stepid: "0"
121
+ submit_dir: /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav
122
+ submit_host: SH-IDC1-10-140-37-45
123
+ task_pid: "151799"
124
+ tasks_per_node: "1"
125
+ topology_addr: SH-IDC1-10-140-37-45
126
+ topology_addr_pattern: node
127
+ umask: "0002"
128
+ working_cluster: cluster_sproject3:SH-IDC1-10-140-37-161:6817:9216:109
129
+ startedAt: "2026-02-24T21:59:38.884470Z"
130
+ writerId: lcgegjlerikh15wodksbpfheadlvwekl
131
+ m:
132
+ - "1": train/global_step
133
+ "6":
134
+ - 3
135
+ "7": []
136
+ - "2": '*'
137
+ "5": 1
138
+ "6":
139
+ - 1
140
+ "7": []
141
+ python_version: 3.10.18
142
+ t:
143
+ "1":
144
+ - 1
145
+ - 11
146
+ - 41
147
+ - 49
148
+ - 51
149
+ - 63
150
+ - 71
151
+ - 98
152
+ - 105
153
+ "2":
154
+ - 1
155
+ - 11
156
+ - 41
157
+ - 49
158
+ - 51
159
+ - 63
160
+ - 71
161
+ - 98
162
+ - 105
163
+ "3":
164
+ - 7
165
+ - 13
166
+ - 66
167
+ "4": 3.10.18
168
+ "5": 0.22.3
169
+ "6": 4.37.2
170
+ "9":
171
+ "1": transformers_trainer
172
+ "12": 0.22.3
173
+ "13": linux-x86_64
174
+ adafactor:
175
+ value: false
176
+ adam_beta1:
177
+ value: 0.9
178
+ adam_beta2:
179
+ value: 0.999
180
+ adam_epsilon:
181
+ value: 1e-08
182
+ add_cross_attention:
183
+ value: false
184
+ architectures:
185
+ value:
186
+ - InternVLChatModel
187
+ auto_find_batch_size:
188
+ value: false
189
+ auto_map:
190
+ value:
191
+ AutoConfig: configuration_internvl_chat.InternVLChatConfig
192
+ AutoModel: modeling_internvl_chat.InternVLChatModel
193
+ AutoModelForCausalLM: modeling_internvl_chat.InternVLChatModel
194
+ bad_words_ids:
195
+ value: null
196
+ begin_suppress_tokens:
197
+ value: null
198
+ bev_image_size:
199
+ value: 448
200
+ bf16:
201
+ value: true
202
+ bf16_full_eval:
203
+ value: false
204
+ bos_token_id:
205
+ value: null
206
+ chunk_size_feed_forward:
207
+ value: 0
208
+ cross_attention_hidden_size:
209
+ value: null
210
+ data_seed:
211
+ value: null
212
+ dataloader_drop_last:
213
+ value: false
214
+ dataloader_num_workers:
215
+ value: 12
216
+ dataloader_persistent_workers:
217
+ value: false
218
+ dataloader_pin_memory:
219
+ value: true
220
+ ddp_backend:
221
+ value: null
222
+ ddp_broadcast_buffers:
223
+ value: null
224
+ ddp_bucket_cap_mb:
225
+ value: null
226
+ ddp_find_unused_parameters:
227
+ value: null
228
+ ddp_timeout:
229
+ value: 1800
230
+ debug:
231
+ value: []
232
+ decoder_start_token_id:
233
+ value: null
234
+ deepspeed:
235
+ value: zero_stage2_config_acc1.json
236
+ disable_tqdm:
237
+ value: false
238
+ dispatch_batches:
239
+ value: null
240
+ diversity_penalty:
241
+ value: 0
242
+ do_eval:
243
+ value: false
244
+ do_predict:
245
+ value: false
246
+ do_sample:
247
+ value: false
248
+ do_train:
249
+ value: true
250
+ downsample_ratio:
251
+ value: 0.5
252
+ dual_text_pos_injection:
253
+ value: true
254
+ dynamic_image_size:
255
+ value: false
256
+ early_stopping:
257
+ value: false
258
+ encoder_no_repeat_ngram_size:
259
+ value: 0
260
+ eos_token_id:
261
+ value: null
262
+ eval_accumulation_steps:
263
+ value: null
264
+ eval_delay:
265
+ value: 0
266
+ eval_steps:
267
+ value: null
268
+ evaluation_strategy:
269
+ value: "no"
270
+ exponential_decay_length_penalty:
271
+ value: null
272
+ finetuning_task:
273
+ value: null
274
+ force_image_size:
275
+ value: 448
276
+ forced_bos_token_id:
277
+ value: null
278
+ forced_eos_token_id:
279
+ value: null
280
+ fp16:
281
+ value: false
282
+ fp16_backend:
283
+ value: auto
284
+ fp16_full_eval:
285
+ value: false
286
+ fp16_opt_level:
287
+ value: O1
288
+ fsdp:
289
+ value: []
290
+ fsdp_config:
291
+ value:
292
+ min_num_params: 0
293
+ xla: false
294
+ xla_fsdp_grad_ckpt: false
295
+ fsdp_min_num_params:
296
+ value: 0
297
+ fsdp_transformer_layer_cls_to_wrap:
298
+ value: null
299
+ full_determinism:
300
+ value: false
301
+ gradient_accumulation_steps:
302
+ value: 1
303
+ gradient_checkpointing:
304
+ value: true
305
+ gradient_checkpointing_kwargs:
306
+ value: null
307
+ greater_is_better:
308
+ value: null
309
+ group_by_length:
310
+ value: false
311
+ half_precision_backend:
312
+ value: auto
313
+ hidden_size:
314
+ value: 1536
315
+ hub_always_push:
316
+ value: false
317
+ hub_model_id:
318
+ value: null
319
+ hub_private_repo:
320
+ value: false
321
+ hub_strategy:
322
+ value: every_save
323
+ hub_token:
324
+ value: <HUB_TOKEN>
325
+ id2label:
326
+ value:
327
+ "0": LABEL_0
328
+ "1": LABEL_1
329
+ ignore_data_skip:
330
+ value: false
331
+ image_fold:
332
+ value: null
333
+ include_inputs_for_metrics:
334
+ value: false
335
+ include_num_input_tokens_seen:
336
+ value: false
337
+ include_tokens_per_second:
338
+ value: false
339
+ is_decoder:
340
+ value: false
341
+ is_encoder_decoder:
342
+ value: false
343
+ jit_mode_eval:
344
+ value: false
345
+ label_names:
346
+ value: null
347
+ label_smoothing_factor:
348
+ value: 0
349
+ label2id:
350
+ value:
351
+ LABEL_0: 0
352
+ LABEL_1: 1
353
+ learning_rate:
354
+ value: 0.0001
355
+ length_column_name:
356
+ value: length
357
+ length_penalty:
358
+ value: 1
359
+ llm_config:
360
+ value:
361
+ _attn_implementation_autoset: true
362
+ _name_or_path: ./pretrained/Qwen2.5-32B-Instruct
363
+ add_cross_attention: false
364
+ architectures:
365
+ - Qwen2ForCausalLM
366
+ attention_dropout: 0
367
+ attn_implementation: flash_attention_2
368
+ bad_words_ids: null
369
+ begin_suppress_tokens: null
370
+ bos_token_id: 151643
371
+ chunk_size_feed_forward: 0
372
+ cross_attention_hidden_size: null
373
+ decoder_start_token_id: null
374
+ diversity_penalty: 0
375
+ do_sample: false
376
+ early_stopping: false
377
+ encoder_no_repeat_ngram_size: 0
378
+ eos_token_id: 151643
379
+ exponential_decay_length_penalty: null
380
+ finetuning_task: null
381
+ forced_bos_token_id: null
382
+ forced_eos_token_id: null
383
+ hidden_act: silu
384
+ hidden_size: 1536
385
+ id2label:
386
+ "0": LABEL_0
387
+ "1": LABEL_1
388
+ initializer_range: 0.02
389
+ intermediate_size: 8960
390
+ is_decoder: false
391
+ is_encoder_decoder: false
392
+ label2id:
393
+ LABEL_0: 0
394
+ LABEL_1: 1
395
+ length_penalty: 1
396
+ max_length: 20
397
+ max_position_embeddings: 32768
398
+ max_window_layers: 70
399
+ min_length: 0
400
+ model_type: qwen2
401
+ moe_config: null
402
+ no_repeat_ngram_size: 0
403
+ num_attention_heads: 12
404
+ num_beam_groups: 1
405
+ num_beams: 1
406
+ num_hidden_layers: 28
407
+ num_key_value_heads: 2
408
+ num_return_sequences: 1
409
+ output_attentions: false
410
+ output_hidden_states: false
411
+ output_scores: false
412
+ pad_token_id: null
413
+ prefix: null
414
+ problem_type: null
415
+ remove_invalid_values: false
416
+ repetition_penalty: 1
417
+ return_dict: true
418
+ return_dict_in_generate: false
419
+ rms_norm_eps: 1e-06
420
+ rope_scaling:
421
+ factor: 2
422
+ rope_type: dynamic
423
+ type: dynamic
424
+ rope_theta: 1e+06
425
+ sep_token_id: null
426
+ sliding_window: null
427
+ suppress_tokens: null
428
+ task_specific_params: null
429
+ temperature: 1
430
+ tf_legacy_loss: false
431
+ tie_encoder_decoder: false
432
+ tie_word_embeddings: false
433
+ tokenizer_class: null
434
+ top_k: 50
435
+ top_p: 1
436
+ torch_dtype: bfloat16
437
+ torchscript: false
438
+ transformers_version: 4.37.2
439
+ typical_p: 1
440
+ use_bfloat16: true
441
+ use_cache: false
442
+ use_sliding_window: false
443
+ vocab_size: 151677
444
+ load_best_model_at_end:
445
+ value: false
446
+ local_rank:
447
+ value: 0
448
+ log_level:
449
+ value: passive
450
+ log_level_replica:
451
+ value: warning
452
+ log_on_each_node:
453
+ value: true
454
+ logging_dir:
455
+ value: runs/Feb25_05-58-51_SH-IDC1-10-140-37-45
456
+ logging_first_step:
457
+ value: false
458
+ logging_nan_inf_filter:
459
+ value: true
460
+ logging_steps:
461
+ value: 1
462
+ logging_strategy:
463
+ value: steps
464
+ lr_scheduler_type:
465
+ value: cosine
466
+ max_dynamic_patch:
467
+ value: 12
468
+ max_grad_norm:
469
+ value: 1
470
+ max_length:
471
+ value: 20
472
+ max_steps:
473
+ value: 11000
474
+ metric_for_best_model:
475
+ value: null
476
+ min_dynamic_patch:
477
+ value: 1
478
+ min_length:
479
+ value: 0
480
+ model_type:
481
+ value: internvl_chat
482
+ mp_parameters:
483
+ value: ""
484
+ neftune_noise_alpha:
485
+ value: null
486
+ no_cuda:
487
+ value: false
488
+ no_repeat_ngram_size:
489
+ value: 0
490
+ num_beam_groups:
491
+ value: 1
492
+ num_beams:
493
+ value: 1
494
+ num_image_token_bev:
495
+ value: 256
496
+ num_image_token_ego:
497
+ value: 32
498
+ num_return_sequences:
499
+ value: 1
500
+ num_train_epochs:
501
+ value: 1
502
+ optim:
503
+ value: adamw_torch
504
+ optim_args:
505
+ value: null
506
+ output_attentions:
507
+ value: false
508
+ output_dir:
509
+ value: /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY
510
+ output_hidden_states:
511
+ value: false
512
+ output_scores:
513
+ value: false
514
+ overwrite_output_dir:
515
+ value: true
516
+ pad_token_id:
517
+ value: null
518
+ pad2square:
519
+ value: false
520
+ past_index:
521
+ value: -1
522
+ per_device_eval_batch_size:
523
+ value: 8
524
+ per_device_train_batch_size:
525
+ value: 1
526
+ per_gpu_eval_batch_size:
527
+ value: null
528
+ per_gpu_train_batch_size:
529
+ value: null
530
+ prediction_loss_only:
531
+ value: false
532
+ prefix:
533
+ value: null
534
+ problem_type:
535
+ value: null
536
+ ps_version:
537
+ value: v2
538
+ push_to_hub:
539
+ value: false
540
+ push_to_hub_model_id:
541
+ value: null
542
+ push_to_hub_organization:
543
+ value: null
544
+ push_to_hub_token:
545
+ value: <PUSH_TO_HUB_TOKEN>
546
+ ray_scope:
547
+ value: last
548
+ remove_invalid_values:
549
+ value: false
550
+ remove_unused_columns:
551
+ value: false
552
+ repetition_penalty:
553
+ value: 1
554
+ report_to:
555
+ value:
556
+ - wandb
557
+ resume_from_checkpoint:
558
+ value: null
559
+ return_dict:
560
+ value: true
561
+ return_dict_in_generate:
562
+ value: false
563
+ run_name:
564
+ value: a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY_steps11000_gpus4_acc1
565
+ save_on_each_node:
566
+ value: false
567
+ save_only_model:
568
+ value: false
569
+ save_safetensors:
570
+ value: true
571
+ save_steps:
572
+ value: 0.5
573
+ save_strategy:
574
+ value: steps
575
+ save_total_limit:
576
+ value: 2
577
+ seed:
578
+ value: 42
579
+ select_layer:
580
+ value: -1
581
+ sep_token_id:
582
+ value: null
583
+ skip_memory_metrics:
584
+ value: true
585
+ split_batches:
586
+ value: false
587
+ suppress_tokens:
588
+ value: null
589
+ system_message:
590
+ value: You are an autonomous navigation agent operating in indoor environments. You receive spatial information through position embeddings injected into visual features and text tokens. Use the BEV map, position embeddings, and semantic information to make navigation decisions. When the target object is detected (<target> marker), navigate directly to it. Otherwise, explore frontiers strategically to find the goal object.
591
+ task_specific_params:
592
+ value: null
593
+ temperature:
594
+ value: 1
595
+ template:
596
+ value: internvl2_5_nav
597
+ tf_legacy_loss:
598
+ value: false
599
+ tf32:
600
+ value: null
601
+ tie_encoder_decoder:
602
+ value: false
603
+ tie_word_embeddings:
604
+ value: false
605
+ tokenizer_class:
606
+ value: null
607
+ top_k:
608
+ value: 50
609
+ top_p:
610
+ value: 1
611
+ torch_compile:
612
+ value: false
613
+ torch_compile_backend:
614
+ value: null
615
+ torch_compile_mode:
616
+ value: null
617
+ torch_dtype:
618
+ value: torch.bfloat16
619
+ torchdynamo:
620
+ value: null
621
+ torchscript:
622
+ value: false
623
+ tpu_metrics_debug:
624
+ value: false
625
+ tpu_num_cores:
626
+ value: null
627
+ transformers_version:
628
+ value: null
629
+ typical_p:
630
+ value: 1
631
+ use_backbone_lora:
632
+ value: 0
633
+ use_bfloat16:
634
+ value: false
635
+ use_cpu:
636
+ value: false
637
+ use_ipex:
638
+ value: false
639
+ use_legacy_prediction_loop:
640
+ value: false
641
+ use_llm_lora:
642
+ value: 64
643
+ use_mps_device:
644
+ value: false
645
+ use_pairwise_spatial_encoder:
646
+ value: false
647
+ use_position_embeddings:
648
+ value: true
649
+ use_thumbnail:
650
+ value: true
651
+ vision_config:
652
+ value:
653
+ _attn_implementation_autoset: true
654
+ _name_or_path: OpenGVLab/InternViT-6B-448px-V1-5
655
+ add_cross_attention: false
656
+ architectures:
657
+ - InternVisionModel
658
+ attention_dropout: 0
659
+ auto_map:
660
+ AutoConfig: configuration_intern_vit.InternVisionConfig
661
+ AutoModel: modeling_intern_vit.InternVisionModel
662
+ bad_words_ids: null
663
+ begin_suppress_tokens: null
664
+ bos_token_id: null
665
+ capacity_factor: 1.2
666
+ chunk_size_feed_forward: 0
667
+ cross_attention_hidden_size: null
668
+ decoder_start_token_id: null
669
+ diversity_penalty: 0
670
+ do_sample: false
671
+ drop_path_rate: 0
672
+ dropout: 0
673
+ early_stopping: false
674
+ encoder_no_repeat_ngram_size: 0
675
+ eos_token_id: null
676
+ eval_capacity_factor: 1.4
677
+ exponential_decay_length_penalty: null
678
+ finetuning_task: null
679
+ forced_bos_token_id: null
680
+ forced_eos_token_id: null
681
+ hidden_act: gelu
682
+ hidden_size: 1024
683
+ id2label:
684
+ "0": LABEL_0
685
+ "1": LABEL_1
686
+ image_size: 448
687
+ initializer_factor: 0.1
688
+ initializer_range: 1e-10
689
+ intermediate_size: 4096
690
+ is_decoder: false
691
+ is_encoder_decoder: false
692
+ label2id:
693
+ LABEL_0: 0
694
+ LABEL_1: 1
695
+ laux_allreduce: all_nodes
696
+ layer_norm_eps: 1e-06
697
+ length_penalty: 1
698
+ max_length: 20
699
+ min_length: 0
700
+ model_type: intern_vit_6b
701
+ moe_coeff_ratio: 0.5
702
+ moe_intermediate_size: 768
703
+ moe_output_scale: 4
704
+ no_repeat_ngram_size: 0
705
+ noisy_gate_policy: RSample_before
706
+ norm_type: layer_norm
707
+ num_attention_heads: 16
708
+ num_beam_groups: 1
709
+ num_beams: 1
710
+ num_channels: 3
711
+ num_experts: 8
712
+ num_hidden_layers: 24
713
+ num_return_sequences: 1
714
+ num_routed_experts: 4
715
+ num_shared_experts: 4
716
+ output_attentions: false
717
+ output_hidden_states: false
718
+ output_scores: false
719
+ pad_token_id: null
720
+ patch_size: 14
721
+ prefix: null
722
+ problem_type: null
723
+ qk_normalization: false
724
+ qkv_bias: true
725
+ remove_invalid_values: false
726
+ repetition_penalty: 1
727
+ return_dict: true
728
+ return_dict_in_generate: false
729
+ sep_token_id: null
730
+ shared_expert_intermediate_size: 3072
731
+ suppress_tokens: null
732
+ task_specific_params: null
733
+ temperature: 1
734
+ tf_legacy_loss: false
735
+ tie_encoder_decoder: false
736
+ tie_word_embeddings: true
737
+ tokenizer_class: null
738
+ top_k: 50
739
+ top_p: 1
740
+ torch_dtype: bfloat16
741
+ torchscript: false
742
+ transformers_version: 4.37.2
743
+ typical_p: 1
744
+ use_bfloat16: true
745
+ use_flash_attn: true
746
+ use_moe: false
747
+ use_residual: true
748
+ use_rts: false
749
+ use_weighted_residual: false
750
+ vit_bev_freeze:
751
+ value: true
752
+ vit_bev_lora_rank:
753
+ value: 64
754
+ vit_bev_use_lora:
755
+ value: true
756
+ vit_rgb_freeze:
757
+ value: true
758
+ vit_rgb_lora_rank:
759
+ value: 16
760
+ vit_rgb_use_lora:
761
+ value: true
762
+ warmup_ratio:
763
+ value: 0.03
764
+ warmup_steps:
765
+ value: 0
766
+ weight_decay:
767
+ value: 0.01
wandb/run-20260225_055938-u2vegsv1/files/output.log ADDED
@@ -0,0 +1,1323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0%| | 0/11000 [00:00<?, ?it/s]
2
+ 02/25/2026 06:17:33 - INFO - internvl_cleaned.model.internvl_chat.modeling_internvl_chat - [Text Position Embedding Verification] Sample 1:
3
+ <s> at token 437 sub_idx=0
4
+ <s> at token 1921 sub_idx=1
5
+ <s> at token 3160 sub_idx=2
6
+ <s> at token 4545 sub_idx=3
7
+ <s> at token 5493 sub_idx=4
8
+ <s> at token 6587 sub_idx=5
9
+ <s> at token 7484 sub_idx=6
10
+ <s> at token 8141 sub_idx=7
11
+ <s> at token 8945 sub_idx=8
12
+ <s> at token 9891 sub_idx=9
13
+ <s> at token 10835 sub_idx=10
14
+ <s> at token 12123 sub_idx=11
15
+ <s> at token 13020 sub_idx=12
16
+ <s> at token 13821 sub_idx=13
17
+ <s> at token 14575 sub_idx=14
18
+ <s> at token 15767 sub_idx=15
19
+ <s> at token 16617 sub_idx=16
20
+ <s> at token 17905 sub_idx=17
21
+ <s> at token 19243 sub_idx=18
22
+ <s> at token 20482 sub_idx=19
23
+ <s> at token 21331 sub_idx=20
24
+ <s> at token 22180 sub_idx=21
25
+ <s> at token 23030 sub_idx=22
26
+ <s> at token 23973 sub_idx=23
27
+ <e_s> at token 439 sub_idx=0
28
+ <e_s> at token 1923 sub_idx=1
29
+ <e_s> at token 3162 sub_idx=2
30
+ <e_s> at token 4547 sub_idx=3
31
+ <e_s> at token 5495 sub_idx=4
32
+ <e_s> at token 6589 sub_idx=5
33
+ <e_s> at token 7486 sub_idx=6
34
+ <e_s> at token 8143 sub_idx=7
35
+ <e_s> at token 8947 sub_idx=8
36
+ <e_s> at token 9893 sub_idx=9
37
+ <e_s> at token 10837 sub_idx=10
38
+ <e_s> at token 12125 sub_idx=11
39
+ <e_s> at token 13022 sub_idx=12
40
+ <e_s> at token 13823 sub_idx=13
41
+ <e_s> at token 14577 sub_idx=14
42
+ <e_s> at token 15769 sub_idx=15
43
+ <e_s> at token 16619 sub_idx=16
44
+ <e_s> at token 17907 sub_idx=17
45
+ <e_s> at token 19245 sub_idx=18
46
+ <e_s> at token 20484 sub_idx=19
47
+ <e_s> at token 21333 sub_idx=20
48
+ <e_s> at token 22182 sub_idx=21
49
+ <e_s> at token 23032 sub_idx=22
50
+ <e_s> at token 23975 sub_idx=23
51
+ <cand> 0 at token 444 sub_idx=0 pixel=[320.0,270.0]
52
+ <cand> 1 at token 492 sub_idx=0 pixel=[185.0,213.0]
53
+ <cand> 2 at token 540 sub_idx=0 pixel=[319.0,247.0]
54
+ <cand> 3 at token 588 sub_idx=0 pixel=[256.0,210.0]
55
+ <cand> 4 at token 636 sub_idx=0 pixel=[212.0,138.0]
56
+ <cand> 5 at token 684 sub_idx=0 pixel=[245.0,121.0]
57
+ <cand> 6 at token 732 sub_idx=0 pixel=[272.0,370.0]
58
+ <cand> 7 at token 780 sub_idx=0 pixel=[307.0,160.0]
59
+ <cand> 8 at token 828 sub_idx=0 pixel=[242.0,314.0]
60
+ <cand> 9 at token 876 sub_idx=0 pixel=[181.0,243.0]
61
+ <cand> 10 at token 924 sub_idx=0 pixel=[235.0,183.0]
62
+ <cand> 11 at token 973 sub_idx=0 pixel=[275.0,174.0]
63
+ <cand> 12 at token 1022 sub_idx=0 pixel=[240.0,152.0]
64
+ <cand> 13 at token 1071 sub_idx=0 pixel=[216.0,214.0]
65
+ <cand> 14 at token 1118 sub_idx=0 pixel=[315.0,117.0]
66
+ <cand> 15 at token 1167 sub_idx=0 pixel=[226.0,111.0]
67
+ <cand> 16 at token 1216 sub_idx=0 pixel=[189.0,277.0]
68
+ <cand> 17 at token 1265 sub_idx=0 pixel=[278.0,161.0]
69
+ <cand> 18 at token 1314 sub_idx=0 pixel=[203.0,286.0]
70
+ <cand> 19 at token 1363 sub_idx=0 pixel=[275.0,194.0]
71
+ <cand> 20 at token 1412 sub_idx=0 pixel=[404.0,400.0]
72
+ <cand> 0 at token 1928 sub_idx=1 pixel=[141.0,126.0]
73
+ <cand> 1 at token 1976 sub_idx=1 pixel=[109.0,55.0]
74
+ <cand> 2 at token 2024 sub_idx=1 pixel=[143.0,246.0]
75
+ <cand> 3 at token 2072 sub_idx=1 pixel=[224.0,269.0]
76
+ <cand> 4 at token 2120 sub_idx=1 pixel=[145.0,219.0]
77
+ <cand> 5 at token 2168 sub_idx=1 pixel=[175.0,188.0]
78
+ <cand> 6 at token 2216 sub_idx=1 pixel=[45.0,88.0]
79
+ <cand> 7 at token 2264 sub_idx=1 pixel=[57.0,178.0]
80
+ <cand> 8 at token 2312 sub_idx=1 pixel=[18.0,16.0]
81
+ <cand> 9 at token 2360 sub_idx=1 pixel=[97.0,76.0]
82
+ <cand> 10 at token 2408 sub_idx=1 pixel=[24.0,147.0]
83
+ <cand> 11 at token 2457 sub_idx=1 pixel=[161.0,107.0]
84
+ <cand> 12 at token 2506 sub_idx=1 pixel=[233.0,297.0]
85
+ <cand> 13 at token 2553 sub_idx=1 pixel=[100.0,222.0]
86
+ <cand> 14 at token 2602 sub_idx=1 pixel=[31.0,226.0]
87
+ <cand> 15 at token 2651 sub_idx=1 pixel=[224.0,255.0]
88
+ <cand> 0 at token 3167 sub_idx=2 pixel=[155.0,57.0]
89
+ <cand> 1 at token 3215 sub_idx=2 pixel=[6.0,64.0]
90
+ <cand> 2 at token 3263 sub_idx=2 pixel=[172.0,64.0]
91
+ <cand> 3 at token 3311 sub_idx=2 pixel=[174.0,148.0]
92
+ <cand> 4 at token 3359 sub_idx=2 pixel=[20.0,108.0]
93
+ <cand> 5 at token 3407 sub_idx=2 pixel=[86.0,136.0]
94
+ <cand> 6 at token 3455 sub_idx=2 pixel=[84.0,33.0]
95
+ <cand> 7 at token 3503 sub_idx=2 pixel=[91.0,69.0]
96
+ <cand> 8 at token 3551 sub_idx=2 pixel=[219.0,232.0]
97
+ <cand> 9 at token 3597 sub_idx=2 pixel=[28.0,85.0]
98
+ <cand> 10 at token 3645 sub_idx=2 pixel=[195.0,236.0]
99
+ <cand> 11 at token 3694 sub_idx=2 pixel=[29.0,44.0]
100
+ <cand> 12 at token 3743 sub_idx=2 pixel=[157.0,76.0]
101
+ <cand> 13 at token 3792 sub_idx=2 pixel=[38.0,129.0]
102
+ <cand> 14 at token 3841 sub_idx=2 pixel=[86.0,208.0]
103
+ <cand> 15 at token 3890 sub_idx=2 pixel=[168.0,211.0]
104
+ <cand> 16 at token 3939 sub_idx=2 pixel=[49.0,206.0]
105
+ <cand> 17 at token 3988 sub_idx=2 pixel=[149.0,230.0]
106
+ <cand> 18 at token 4037 sub_idx=2 pixel=[58.0,102.0]
107
+ <cand> 0 at token 4552 sub_idx=3 pixel=[21.0,189.0]
108
+ <cand> 1 at token 4600 sub_idx=3 pixel=[220.0,170.0]
109
+ <cand> 2 at token 4648 sub_idx=3 pixel=[5.0,225.0]
110
+ <cand> 3 at token 4696 sub_idx=3 pixel=[193.0,118.0]
111
+ <cand> 4 at token 4744 sub_idx=3 pixel=[180.0,151.0]
112
+ <cand> 5 at token 4792 sub_idx=3 pixel=[72.0,83.0]
113
+ <cand> 6 at token 4840 sub_idx=3 pixel=[157.0,221.0]
114
+ <cand> 7 at token 4888 sub_idx=3 pixel=[159.0,82.0]
115
+ <cand> 8 at token 4936 sub_idx=3 pixel=[99.0,179.0]
116
+ <cand> 9 at token 4984 sub_idx=3 pixel=[213.0,234.0]
117
+ <cand> 0 at token 5500 sub_idx=4 pixel=[278.0,132.0]
118
+ <cand> 1 at token 5548 sub_idx=4 pixel=[272.0,71.0]
119
+ <cand> 2 at token 5596 sub_idx=4 pixel=[278.0,266.0]
120
+ <cand> 3 at token 5644 sub_idx=4 pixel=[222.0,135.0]
121
+ <cand> 4 at token 5692 sub_idx=4 pixel=[208.0,113.0]
122
+ <cand> 5 at token 5740 sub_idx=4 pixel=[260.0,237.0]
123
+ <cand> 6 at token 5788 sub_idx=4 pixel=[175.0,258.0]
124
+ <cand> 7 at token 5836 sub_idx=4 pixel=[229.0,282.0]
125
+ <cand> 8 at token 5884 sub_idx=4 pixel=[291.0,200.0]
126
+ <cand> 9 at token 5932 sub_idx=4 pixel=[235.0,65.0]
127
+ <cand> 10 at token 5980 sub_idx=4 pixel=[269.0,287.0]
128
+ <cand> 11 at token 6029 sub_idx=4 pixel=[207.0,86.0]
129
+ <cand> 12 at token 6078 sub_idx=4 pixel=[183.0,242.0]
130
+ <cand> 0 at token 6594 sub_idx=5 pixel=[312.0,203.0]
131
+ <cand> 1 at token 6642 sub_idx=5 pixel=[203.0,237.0]
132
+ <cand> 2 at token 6690 sub_idx=5 pixel=[230.0,143.0]
133
+ <cand> 3 at token 6738 sub_idx=5 pixel=[342.0,209.0]
134
+ <cand> 4 at token 6786 sub_idx=5 pixel=[247.0,178.0]
135
+ <cand> 5 at token 6834 sub_idx=5 pixel=[216.0,115.0]
136
+ <cand> 6 at token 6882 sub_idx=5 pixel=[308.0,249.0]
137
+ <cand> 7 at token 6930 sub_idx=5 pixel=[263.0,258.0]
138
+ <cand> 8 at token 6978 sub_idx=5 pixel=[218.0,197.0]
139
+ <cand> 0 at token 7491 sub_idx=6 pixel=[173.0,163.0]
140
+ <cand> 1 at token 7539 sub_idx=6 pixel=[253.0,129.0]
141
+ <cand> 2 at token 7587 sub_idx=6 pixel=[192.0,203.0]
142
+ <cand> 3 at token 7635 sub_idx=6 pixel=[193.0,115.0]
143
+ <cand> 0 at token 8148 sub_idx=7 pixel=[269.0,204.0]
144
+ <cand> 1 at token 8196 sub_idx=7 pixel=[161.0,221.0]
145
+ <cand> 2 at token 8244 sub_idx=7 pixel=[193.0,296.0]
146
+ <cand> 3 at token 8292 sub_idx=7 pixel=[159.0,251.0]
147
+ <cand> 4 at token 8340 sub_idx=7 pixel=[139.0,222.0]
148
+ <cand> 5 at token 8388 sub_idx=7 pixel=[271.0,231.0]
149
+ <cand> 6 at token 8436 sub_idx=7 pixel=[242.0,235.0]
150
+ <cand> 0 at token 8952 sub_idx=8 pixel=[234.0,193.0]
151
+ <cand> 1 at token 9000 sub_idx=8 pixel=[211.0,399.0]
152
+ <cand> 2 at token 9048 sub_idx=8 pixel=[204.0,379.0]
153
+ <cand> 3 at token 9096 sub_idx=8 pixel=[217.0,126.0]
154
+ <cand> 4 at token 9144 sub_idx=8 pixel=[197.0,206.0]
155
+ <cand> 5 at token 9192 sub_idx=8 pixel=[272.0,344.0]
156
+ <cand> 6 at token 9240 sub_idx=8 pixel=[241.0,177.0]
157
+ <cand> 7 at token 9288 sub_idx=8 pixel=[303.0,416.0]
158
+ <cand> 8 at token 9336 sub_idx=8 pixel=[194.0,316.0]
159
+ <cand> 9 at token 9384 sub_idx=8 pixel=[269.0,443.0]
160
+ <cand> 0 at token 9898 sub_idx=9 pixel=[248.0,271.0]
161
+ <cand> 1 at token 9946 sub_idx=9 pixel=[183.0,251.0]
162
+ <cand> 2 at token 9994 sub_idx=9 pixel=[243.0,244.0]
163
+ <cand> 3 at token 10042 sub_idx=9 pixel=[251.0,312.0]
164
+ <cand> 4 at token 10090 sub_idx=9 pixel=[230.0,214.0]
165
+ <cand> 5 at token 10136 sub_idx=9 pixel=[107.0,373.0]
166
+ <cand> 6 at token 10184 sub_idx=9 pixel=[110.0,350.0]
167
+ <cand> 7 at token 10232 sub_idx=9 pixel=[324.0,326.0]
168
+ <cand> 8 at token 10280 sub_idx=9 pixel=[286.0,375.0]
169
+ <cand> 9 at token 10328 sub_idx=9 pixel=[331.0,346.0]
170
+ <cand> 0 at token 10842 sub_idx=10 pixel=[317.0,192.0]
171
+ <cand> 1 at token 10890 sub_idx=10 pixel=[110.0,244.0]
172
+ <cand> 2 at token 10938 sub_idx=10 pixel=[246.0,181.0]
173
+ <cand> 3 at token 10986 sub_idx=10 pixel=[209.0,249.0]
174
+ <cand> 4 at token 11034 sub_idx=10 pixel=[230.0,268.0]
175
+ <cand> 5 at token 11082 sub_idx=10 pixel=[144.0,222.0]
176
+ <cand> 6 at token 11130 sub_idx=10 pixel=[276.0,212.0]
177
+ <cand> 7 at token 11178 sub_idx=10 pixel=[138.0,293.0]
178
+ <cand> 8 at token 11226 sub_idx=10 pixel=[189.0,298.0]
179
+ <cand> 9 at token 11274 sub_idx=10 pixel=[150.0,237.0]
180
+ <cand> 10 at token 11322 sub_idx=10 pixel=[88.0,340.0]
181
+ <cand> 11 at token 11371 sub_idx=10 pixel=[49.0,330.0]
182
+ <cand> 12 at token 11420 sub_idx=10 pixel=[128.0,273.0]
183
+ <cand> 13 at token 11469 sub_idx=10 pixel=[244.0,222.0]
184
+ <cand> 14 at token 11516 sub_idx=10 pixel=[22.0,204.0]
185
+ <cand> 15 at token 11565 sub_idx=10 pixel=[223.0,311.0]
186
+ <cand> 16 at token 11614 sub_idx=10 pixel=[295.0,180.0]
187
+ <cand> 0 at token 12130 sub_idx=11 pixel=[255.0,239.0]
188
+ <cand> 1 at token 12178 sub_idx=11 pixel=[129.0,321.0]
189
+ <cand> 2 at token 12226 sub_idx=11 pixel=[299.0,209.0]
190
+ <cand> 3 at token 12274 sub_idx=11 pixel=[255.0,210.0]
191
+ <cand> 4 at token 12322 sub_idx=11 pixel=[109.0,327.0]
192
+ <cand> 5 at token 12370 sub_idx=11 pixel=[239.0,296.0]
193
+ <cand> 6 at token 12418 sub_idx=11 pixel=[136.0,291.0]
194
+ <cand> 7 at token 12466 sub_idx=11 pixel=[211.0,319.0]
195
+ <cand> 8 at token 12514 sub_idx=11 pixel=[171.0,313.0]
196
+ <cand> 0 at token 13027 sub_idx=12 pixel=[261.0,348.0]
197
+ <cand> 1 at token 13075 sub_idx=12 pixel=[321.0,357.0]
198
+ <cand> 2 at token 13123 sub_idx=12 pixel=[171.0,274.0]
199
+ <cand> 3 at token 13171 sub_idx=12 pixel=[225.0,182.0]
200
+ <cand> 4 at token 13219 sub_idx=12 pixel=[270.0,115.0]
201
+ <cand> 5 at token 13267 sub_idx=12 pixel=[328.0,265.0]
202
+ <cand> 6 at token 13315 sub_idx=12 pixel=[224.0,161.0]
203
+ <cand> 0 at token 13828 sub_idx=13 pixel=[221.0,187.0]
204
+ <cand> 1 at token 13876 sub_idx=13 pixel=[189.0,204.0]
205
+ <cand> 2 at token 13924 sub_idx=13 pixel=[220.0,160.0]
206
+ <cand> 3 at token 13972 sub_idx=13 pixel=[194.0,236.0]
207
+ <cand> 4 at token 14020 sub_idx=13 pixel=[150.0,247.0]
208
+ <cand> 5 at token 14068 sub_idx=13 pixel=[210.0,197.0]
209
+ <cand> 0 at token 14582 sub_idx=14 pixel=[219.0,191.0]
210
+ <cand> 1 at token 14630 sub_idx=14 pixel=[150.0,237.0]
211
+ <cand> 2 at token 14678 sub_idx=14 pixel=[266.0,213.0]
212
+ <cand> 3 at token 14726 sub_idx=14 pixel=[196.0,129.0]
213
+ <cand> 4 at token 14774 sub_idx=14 pixel=[213.0,347.0]
214
+ <cand> 5 at token 14822 sub_idx=14 pixel=[173.0,265.0]
215
+ <cand> 6 at token 14870 sub_idx=14 pixel=[215.0,274.0]
216
+ <cand> 7 at token 14918 sub_idx=14 pixel=[165.0,210.0]
217
+ <cand> 8 at token 14966 sub_idx=14 pixel=[264.0,279.0]
218
+ <cand> 9 at token 15014 sub_idx=14 pixel=[155.0,191.0]
219
+ <cand> 10 at token 15062 sub_idx=14 pixel=[373.0,380.0]
220
+ <cand> 11 at token 15111 sub_idx=14 pixel=[136.0,265.0]
221
+ <cand> 12 at token 15160 sub_idx=14 pixel=[223.0,259.0]
222
+ <cand> 13 at token 15209 sub_idx=14 pixel=[292.0,249.0]
223
+ <cand> 14 at token 15258 sub_idx=14 pixel=[357.0,387.0]
224
+ <cand> 0 at token 15774 sub_idx=15 pixel=[249.0,86.0]
225
+ <cand> 1 at token 15822 sub_idx=15 pixel=[173.0,218.0]
226
+ <cand> 2 at token 15870 sub_idx=15 pixel=[280.0,249.0]
227
+ <cand> 3 at token 15918 sub_idx=15 pixel=[288.0,211.0]
228
+ <cand> 4 at token 15966 sub_idx=15 pixel=[228.0,82.0]
229
+ <cand> 5 at token 16014 sub_idx=15 pixel=[310.0,161.0]
230
+ <cand> 6 at token 16062 sub_idx=15 pixel=[178.0,234.0]
231
+ <cand> 7 at token 16110 sub_idx=15 pixel=[232.0,51.0]
232
+ <cand> 0 at token 16624 sub_idx=16 pixel=[104.0,112.0]
233
+ <cand> 1 at token 16672 sub_idx=16 pixel=[182.0,47.0]
234
+ <cand> 2 at token 16720 sub_idx=16 pixel=[189.0,245.0]
235
+ <cand> 3 at token 16768 sub_idx=16 pixel=[245.0,57.0]
236
+ <cand> 4 at token 16816 sub_idx=16 pixel=[325.0,61.0]
237
+ <cand> 5 at token 16864 sub_idx=16 pixel=[108.0,230.0]
238
+ <cand> 6 at token 16912 sub_idx=16 pixel=[98.0,101.0]
239
+ <cand> 7 at token 16960 sub_idx=16 pixel=[365.0,224.0]
240
+ <cand> 8 at token 17008 sub_idx=16 pixel=[223.0,175.0]
241
+ <cand> 9 at token 17056 sub_idx=16 pixel=[260.0,311.0]
242
+ <cand> 10 at token 17104 sub_idx=16 pixel=[216.0,251.0]
243
+ <cand> 11 at token 17151 sub_idx=16 pixel=[284.0,246.0]
244
+ <cand> 12 at token 17200 sub_idx=16 pixel=[206.0,195.0]
245
+ <cand> 13 at token 17249 sub_idx=16 pixel=[339.0,258.0]
246
+ <cand> 14 at token 17298 sub_idx=16 pixel=[302.0,249.0]
247
+ <cand> 15 at token 17347 sub_idx=16 pixel=[316.0,177.0]
248
+ <cand> 16 at token 17396 sub_idx=16 pixel=[271.0,154.0]
249
+ <cand> 0 at token 17912 sub_idx=17 pixel=[281.0,280.0]
250
+ <cand> 1 at token 17960 sub_idx=17 pixel=[250.0,245.0]
251
+ <cand> 2 at token 18008 sub_idx=17 pixel=[197.0,321.0]
252
+ <cand> 3 at token 18056 sub_idx=17 pixel=[188.0,271.0]
253
+ <cand> 4 at token 18104 sub_idx=17 pixel=[383.0,250.0]
254
+ <cand> 5 at token 18152 sub_idx=17 pixel=[363.0,189.0]
255
+ <cand> 6 at token 18200 sub_idx=17 pixel=[206.0,180.0]
256
+ <cand> 7 at token 18248 sub_idx=17 pixel=[305.0,327.0]
257
+ <cand> 8 at token 18296 sub_idx=17 pixel=[244.0,157.0]
258
+ <cand> 9 at token 18344 sub_idx=17 pixel=[206.0,128.0]
259
+ <cand> 10 at token 18392 sub_idx=17 pixel=[365.0,172.0]
260
+ <cand> 11 at token 18441 sub_idx=17 pixel=[174.0,295.0]
261
+ <cand> 12 at token 18490 sub_idx=17 pixel=[355.0,309.0]
262
+ <cand> 13 at token 18539 sub_idx=17 pixel=[240.0,235.0]
263
+ <cand> 14 at token 18586 sub_idx=17 pixel=[153.0,326.0]
264
+ <cand> 15 at token 18635 sub_idx=17 pixel=[238.0,325.0]
265
+ <cand> 16 at token 18684 sub_idx=17 pixel=[364.0,327.0]
266
+ <cand> 17 at token 18733 sub_idx=17 pixel=[179.0,248.0]
267
+ <cand> 0 at token 19250 sub_idx=18 pixel=[196.0,286.0]
268
+ <cand> 1 at token 19298 sub_idx=18 pixel=[186.0,308.0]
269
+ <cand> 2 at token 19346 sub_idx=18 pixel=[354.0,318.0]
270
+ <cand> 3 at token 19394 sub_idx=18 pixel=[196.0,186.0]
271
+ <cand> 4 at token 19442 sub_idx=18 pixel=[212.0,193.0]
272
+ <cand> 5 at token 19490 sub_idx=18 pixel=[205.0,259.0]
273
+ <cand> 6 at token 19538 sub_idx=18 pixel=[321.0,249.0]
274
+ <cand> 7 at token 19586 sub_idx=18 pixel=[236.0,238.0]
275
+ <cand> 8 at token 19634 sub_idx=18 pixel=[323.0,267.0]
276
+ <cand> 9 at token 19682 sub_idx=18 pixel=[152.0,188.0]
277
+ <cand> 10 at token 19730 sub_idx=18 pixel=[305.0,190.0]
278
+ <cand> 11 at token 19779 sub_idx=18 pixel=[237.0,268.0]
279
+ <cand> 12 at token 19826 sub_idx=18 pixel=[143.0,203.0]
280
+ <cand> 13 at token 19875 sub_idx=18 pixel=[226.0,280.0]
281
+ <cand> 14 at token 19924 sub_idx=18 pixel=[237.0,320.0]
282
+ <cand> 15 at token 19973 sub_idx=18 pixel=[188.0,183.0]
283
+ <cand> 0 at token 20489 sub_idx=19 pixel=[197.0,269.0]
284
+ <cand> 1 at token 20537 sub_idx=19 pixel=[227.0,291.0]
285
+ <cand> 2 at token 20585 sub_idx=19 pixel=[209.0,204.0]
286
+ <cand> 3 at token 20633 sub_idx=19 pixel=[230.0,264.0]
287
+ <cand> 4 at token 20681 sub_idx=19 pixel=[247.0,174.0]
288
+ <cand> 5 at token 20729 sub_idx=19 pixel=[246.0,194.0]
289
+ <cand> 6 at token 20777 sub_idx=19 pixel=[264.0,226.0]
290
+ <cand> 7 at token 20825 sub_idx=19 pixel=[260.0,217.0]
291
+ <cand> 0 at token 21338 sub_idx=20 pixel=[237.0,243.0]
292
+ <cand> 1 at token 21384 sub_idx=20 pixel=[277.0,77.0]
293
+ <cand> 2 at token 21432 sub_idx=20 pixel=[310.0,237.0]
294
+ <cand> 3 at token 21480 sub_idx=20 pixel=[328.0,289.0]
295
+ <cand> 4 at token 21528 sub_idx=20 pixel=[245.0,101.0]
296
+ <cand> 5 at token 21576 sub_idx=20 pixel=[241.0,159.0]
297
+ <cand> 6 at token 21624 sub_idx=20 pixel=[183.0,200.0]
298
+ <cand> 7 at token 21672 sub_idx=20 pixel=[229.0,270.0]
299
+ <cand> 0 at token 22187 sub_idx=21 pixel=[134.0,124.0]
300
+ <cand> 1 at token 22235 sub_idx=21 pixel=[195.0,137.0]
301
+ <cand> 2 at token 22283 sub_idx=21 pixel=[65.0,112.0]
302
+ <cand> 3 at token 22331 sub_idx=21 pixel=[255.0,264.0]
303
+ <cand> 4 at token 22379 sub_idx=21 pixel=[207.0,281.0]
304
+ <cand> 5 at token 22427 sub_idx=21 pixel=[97.0,122.0]
305
+ <cand> 6 at token 22475 sub_idx=21 pixel=[241.0,182.0]
306
+ <cand> 7 at token 22523 sub_idx=21 pixel=[226.0,183.0]
307
+ <cand> 0 at token 23037 sub_idx=22 pixel=[242.0,288.0]
308
+ <cand> 1 at token 23085 sub_idx=22 pixel=[113.0,419.0]
309
+ <cand> 2 at token 23133 sub_idx=22 pixel=[218.0,220.0]
310
+ <cand> 3 at token 23179 sub_idx=22 pixel=[142.0,145.0]
311
+ <cand> 4 at token 23227 sub_idx=22 pixel=[218.0,368.0]
312
+ <cand> 5 at token 23275 sub_idx=22 pixel=[208.0,266.0]
313
+ <cand> 6 at token 23323 sub_idx=22 pixel=[198.0,377.0]
314
+ <cand> 7 at token 23371 sub_idx=22 pixel=[122.0,372.0]
315
+ <cand> 8 at token 23419 sub_idx=22 pixel=[226.0,278.0]
316
+ <cand> 9 at token 23467 sub_idx=22 pixel=[239.0,278.0]
317
+ <cand> 0 at token 23980 sub_idx=23 pixel=[197.0,39.0]
318
+ <cand> 1 at token 24028 sub_idx=23 pixel=[290.0,274.0]
319
+ <cand> 2 at token 24076 sub_idx=23 pixel=[228.0,337.0]
320
+ <cand> 3 at token 24124 sub_idx=23 pixel=[259.0,296.0]
321
+ <cand> 4 at token 24172 sub_idx=23 pixel=[267.0,247.0]
322
+ <cand> 5 at token 24220 sub_idx=23 pixel=[185.0,32.0]
323
+ <cand> 6 at token 24268 sub_idx=23 pixel=[318.0,213.0]
324
+ <cand> 7 at token 24316 sub_idx=23 pixel=[316.0,38.0]
325
+ <cand> 8 at token 24364 sub_idx=23 pixel=[267.0,316.0]
326
+ <cand> 9 at token 24412 sub_idx=23 pixel=[209.0,218.0]
327
+ <e_cand> 0 at token 490 sub_idx=0 (same embed)
328
+ <e_cand> 1 at token 538 sub_idx=0 (same embed)
329
+ <e_cand> 2 at token 586 sub_idx=0 (same embed)
330
+ <e_cand> 0 at token 1974 sub_idx=1 (same embed)
331
+ <e_cand> 1 at token 2022 sub_idx=1 (same embed)
332
+ <e_cand> 2 at token 2070 sub_idx=1 (same embed)
333
+ <e_cand> 0 at token 3213 sub_idx=2 (same embed)
334
+ <e_cand> 1 at token 3261 sub_idx=2 (same embed)
335
+ <e_cand> 2 at token 3309 sub_idx=2 (same embed)
336
+ <e_cand> 0 at token 4598 sub_idx=3 (same embed)
337
+ <e_cand> 1 at token 4646 sub_idx=3 (same embed)
338
+ <e_cand> 2 at token 4694 sub_idx=3 (same embed)
339
+ <e_cand> 0 at token 5546 sub_idx=4 (same embed)
340
+ <e_cand> 1 at token 5594 sub_idx=4 (same embed)
341
+ <e_cand> 2 at token 5642 sub_idx=4 (same embed)
342
+ <e_cand> 0 at token 6640 sub_idx=5 (same embed)
343
+ <e_cand> 1 at token 6688 sub_idx=5 (same embed)
344
+ <e_cand> 2 at token 6736 sub_idx=5 (same embed)
345
+ <e_cand> 0 at token 7537 sub_idx=6 (same embed)
346
+ <e_cand> 1 at token 7585 sub_idx=6 (same embed)
347
+ <e_cand> 2 at token 7633 sub_idx=6 (same embed)
348
+ <e_cand> 0 at token 8194 sub_idx=7 (same embed)
349
+ <e_cand> 1 at token 8242 sub_idx=7 (same embed)
350
+ <e_cand> 2 at token 8290 sub_idx=7 (same embed)
351
+ <e_cand> 0 at token 8998 sub_idx=8 (same embed)
352
+ <e_cand> 1 at token 9046 sub_idx=8 (same embed)
353
+ <e_cand> 2 at token 9094 sub_idx=8 (same embed)
354
+ <e_cand> 0 at token 9944 sub_idx=9 (same embed)
355
+ <e_cand> 1 at token 9992 sub_idx=9 (same embed)
356
+ <e_cand> 2 at token 10040 sub_idx=9 (same embed)
357
+ <e_cand> 0 at token 10888 sub_idx=10 (same embed)
358
+ <e_cand> 1 at token 10936 sub_idx=10 (same embed)
359
+ <e_cand> 2 at token 10984 sub_idx=10 (same embed)
360
+ <e_cand> 0 at token 12176 sub_idx=11 (same embed)
361
+ <e_cand> 1 at token 12224 sub_idx=11 (same embed)
362
+ <e_cand> 2 at token 12272 sub_idx=11 (same embed)
363
+ <e_cand> 0 at token 13073 sub_idx=12 (same embed)
364
+ <e_cand> 1 at token 13121 sub_idx=12 (same embed)
365
+ <e_cand> 2 at token 13169 sub_idx=12 (same embed)
366
+ <e_cand> 0 at token 13874 sub_idx=13 (same embed)
367
+ <e_cand> 1 at token 13922 sub_idx=13 (same embed)
368
+ <e_cand> 2 at token 13970 sub_idx=13 (same embed)
369
+ <e_cand> 0 at token 14628 sub_idx=14 (same embed)
370
+ <e_cand> 1 at token 14676 sub_idx=14 (same embed)
371
+ <e_cand> 2 at token 14724 sub_idx=14 (same embed)
372
+ <e_cand> 0 at token 15820 sub_idx=15 (same embed)
373
+ <e_cand> 1 at token 15868 sub_idx=15 (same embed)
374
+ <e_cand> 2 at token 15916 sub_idx=15 (same embed)
375
+ <e_cand> 0 at token 16670 sub_idx=16 (same embed)
376
+ <e_cand> 1 at token 16718 sub_idx=16 (same embed)
377
+ <e_cand> 2 at token 16766 sub_idx=16 (same embed)
378
+ <e_cand> 0 at token 17958 sub_idx=17 (same embed)
379
+ <e_cand> 1 at token 18006 sub_idx=17 (same embed)
380
+ <e_cand> 2 at token 18054 sub_idx=17 (same embed)
381
+ <e_cand> 0 at token 19296 sub_idx=18 (same embed)
382
+ <e_cand> 1 at token 19344 sub_idx=18 (same embed)
383
+ <e_cand> 2 at token 19392 sub_idx=18 (same embed)
384
+ <e_cand> 0 at token 20535 sub_idx=19 (same embed)
385
+ <e_cand> 1 at token 20583 sub_idx=19 (same embed)
386
+ <e_cand> 2 at token 20631 sub_idx=19 (same embed)
387
+ <e_cand> 0 at token 21382 sub_idx=20 (same embed)
388
+ <e_cand> 1 at token 21430 sub_idx=20 (same embed)
389
+ <e_cand> 2 at token 21478 sub_idx=20 (same embed)
390
+ <e_cand> 0 at token 22233 sub_idx=21 (same embed)
391
+ <e_cand> 1 at token 22281 sub_idx=21 (same embed)
392
+ <e_cand> 2 at token 22329 sub_idx=21 (same embed)
393
+ <e_cand> 0 at token 23083 sub_idx=22 (same embed)
394
+ <e_cand> 1 at token 23131 sub_idx=22 (same embed)
395
+ <e_cand> 2 at token 23177 sub_idx=22 (same embed)
396
+ <e_cand> 0 at token 24026 sub_idx=23 (same embed)
397
+ <e_cand> 1 at token 24074 sub_idx=23 (same embed)
398
+ <e_cand> 2 at token 24122 sub_idx=23 (same embed)
399
+
400
+ [2026-02-25 06:17:41,876] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 4.88 | optimizer_gradients: 32.07 | optimizer_step: 4.90
401
+ [2026-02-25 06:17:41,878] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 13975.93 | bwd_microstep: 7137.28 | bwd_inner_microstep: 6532.16 | bwd_allreduce_microstep: 605.00 | step_microstep: 143.00
402
+ [2026-02-25 06:17:41,879] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 13975.93 | bwd: 7137.29 | bwd_inner: 6532.15 | bwd_allreduce: 605.03 | step: 143.16
403
+ {'loss': 1.6229, 'learning_rate': 3.0303030303030305e-07, 'epoch': 0.0}
404
+ 02/25/2026 06:17:49 - INFO - internvl_cleaned.model.internvl_chat.modeling_internvl_chat - [Text Position Embedding Verification] Sample 2:
405
+ <s> at token 437 sub_idx=0
406
+ <s> at token 1336 sub_idx=1
407
+ <s> at token 2476 sub_idx=2
408
+ <s> at token 3469 sub_idx=3
409
+ <s> at token 4463 sub_idx=4
410
+ <s> at token 5656 sub_idx=5
411
+ <s> at token 6506 sub_idx=6
412
+ <s> at token 7695 sub_idx=7
413
+ <s> at token 8593 sub_idx=8
414
+ <s> at token 9635 sub_idx=9
415
+ <s> at token 10245 sub_idx=10
416
+ <s> at token 10950 sub_idx=11
417
+ <s> at token 12334 sub_idx=12
418
+ <s> at token 13376 sub_idx=13
419
+ <s> at token 14468 sub_idx=14
420
+ <s> at token 15414 sub_idx=15
421
+ <s> at token 16023 sub_idx=16
422
+ <s> at token 17508 sub_idx=17
423
+ <s> at token 18168 sub_idx=18
424
+ <s> at token 19309 sub_idx=19
425
+ <s> at token 20254 sub_idx=20
426
+ <s> at token 21055 sub_idx=21
427
+ <s> at token 22001 sub_idx=22
428
+ <s> at token 23336 sub_idx=23
429
+ <s> at token 23993 sub_idx=24
430
+ <e_s> at token 439 sub_idx=0
431
+ <e_s> at token 1338 sub_idx=1
432
+ <e_s> at token 2478 sub_idx=2
433
+ <e_s> at token 3471 sub_idx=3
434
+ <e_s> at token 4465 sub_idx=4
435
+ <e_s> at token 5658 sub_idx=5
436
+ <e_s> at token 6508 sub_idx=6
437
+ <e_s> at token 7697 sub_idx=7
438
+ <e_s> at token 8595 sub_idx=8
439
+ <e_s> at token 9637 sub_idx=9
440
+ <e_s> at token 10247 sub_idx=10
441
+ <e_s> at token 10952 sub_idx=11
442
+ <e_s> at token 12336 sub_idx=12
443
+ <e_s> at token 13378 sub_idx=13
444
+ <e_s> at token 14470 sub_idx=14
445
+ <e_s> at token 15416 sub_idx=15
446
+ <e_s> at token 16025 sub_idx=16
447
+ <e_s> at token 17510 sub_idx=17
448
+ <e_s> at token 18170 sub_idx=18
449
+ <e_s> at token 19311 sub_idx=19
450
+ <e_s> at token 20256 sub_idx=20
451
+ <e_s> at token 21057 sub_idx=21
452
+ <e_s> at token 22003 sub_idx=22
453
+ <e_s> at token 23338 sub_idx=23
454
+ <e_s> at token 23995 sub_idx=24
455
+ <cand> 0 at token 444 sub_idx=0 pixel=[234.0,176.0]
456
+ <cand> 1 at token 492 sub_idx=0 pixel=[340.0,233.0]
457
+ <cand> 2 at token 540 sub_idx=0 pixel=[352.0,297.0]
458
+ <cand> 3 at token 588 sub_idx=0 pixel=[369.0,275.0]
459
+ <cand> 4 at token 636 sub_idx=0 pixel=[174.0,246.0]
460
+ <cand> 5 at token 684 sub_idx=0 pixel=[191.0,156.0]
461
+ <cand> 6 at token 732 sub_idx=0 pixel=[235.0,127.0]
462
+ <cand> 7 at token 780 sub_idx=0 pixel=[120.0,228.0]
463
+ <cand> 8 at token 828 sub_idx=0 pixel=[153.0,252.0]
464
+ <cand> 0 at token 1343 sub_idx=1 pixel=[175.0,236.0]
465
+ <cand> 1 at token 1389 sub_idx=1 pixel=[140.0,179.0]
466
+ <cand> 2 at token 1437 sub_idx=1 pixel=[199.0,212.0]
467
+ <cand> 3 at token 1485 sub_idx=1 pixel=[92.0,241.0]
468
+ <cand> 4 at token 1533 sub_idx=1 pixel=[207.0,289.0]
469
+ <cand> 5 at token 1581 sub_idx=1 pixel=[51.0,44.0]
470
+ <cand> 6 at token 1629 sub_idx=1 pixel=[137.0,286.0]
471
+ <cand> 7 at token 1677 sub_idx=1 pixel=[155.0,302.0]
472
+ <cand> 8 at token 1725 sub_idx=1 pixel=[50.0,123.0]
473
+ <cand> 9 at token 1773 sub_idx=1 pixel=[184.0,187.0]
474
+ <cand> 10 at token 1821 sub_idx=1 pixel=[159.0,260.0]
475
+ <cand> 11 at token 1870 sub_idx=1 pixel=[85.0,105.0]
476
+ <cand> 12 at token 1919 sub_idx=1 pixel=[131.0,122.0]
477
+ <cand> 13 at token 1968 sub_idx=1 pixel=[111.0,246.0]
478
+ <cand> 0 at token 2483 sub_idx=2 pixel=[259.0,253.0]
479
+ <cand> 1 at token 2529 sub_idx=2 pixel=[73.0,162.0]
480
+ <cand> 2 at token 2577 sub_idx=2 pixel=[287.0,247.0]
481
+ <cand> 3 at token 2625 sub_idx=2 pixel=[7.0,159.0]
482
+ <cand> 4 at token 2673 sub_idx=2 pixel=[223.0,267.0]
483
+ <cand> 5 at token 2721 sub_idx=2 pixel=[274.0,257.0]
484
+ <cand> 6 at token 2769 sub_idx=2 pixel=[252.0,223.0]
485
+ <cand> 7 at token 2817 sub_idx=2 pixel=[116.0,118.0]
486
+ <cand> 8 at token 2865 sub_idx=2 pixel=[13.0,136.0]
487
+ <cand> 9 at token 2913 sub_idx=2 pixel=[191.0,153.0]
488
+ <cand> 10 at token 2961 sub_idx=2 pixel=[119.0,147.0]
489
+ <cand> 0 at token 3476 sub_idx=3 pixel=[170.0,237.0]
490
+ <cand> 1 at token 3524 sub_idx=3 pixel=[128.0,180.0]
491
+ <cand> 2 at token 3572 sub_idx=3 pixel=[319.0,253.0]
492
+ <cand> 3 at token 3620 sub_idx=3 pixel=[305.0,267.0]
493
+ <cand> 4 at token 3668 sub_idx=3 pixel=[235.0,238.0]
494
+ <cand> 5 at token 3716 sub_idx=3 pixel=[160.0,162.0]
495
+ <cand> 6 at token 3764 sub_idx=3 pixel=[122.0,222.0]
496
+ <cand> 7 at token 3812 sub_idx=3 pixel=[210.0,199.0]
497
+ <cand> 8 at token 3860 sub_idx=3 pixel=[128.0,234.0]
498
+ <cand> 9 at token 3908 sub_idx=3 pixel=[311.0,165.0]
499
+ <cand> 10 at token 3956 sub_idx=3 pixel=[298.0,105.0]
500
+ <cand> 0 at token 4470 sub_idx=4 pixel=[240.0,286.0]
501
+ <cand> 1 at token 4518 sub_idx=4 pixel=[79.0,332.0]
502
+ <cand> 2 at token 4566 sub_idx=4 pixel=[253.0,233.0]
503
+ <cand> 3 at token 4614 sub_idx=4 pixel=[304.0,152.0]
504
+ <cand> 4 at token 4662 sub_idx=4 pixel=[253.0,181.0]
505
+ <cand> 5 at token 4710 sub_idx=4 pixel=[125.0,204.0]
506
+ <cand> 6 at token 4758 sub_idx=4 pixel=[201.0,319.0]
507
+ <cand> 7 at token 4806 sub_idx=4 pixel=[136.0,182.0]
508
+ <cand> 8 at token 4854 sub_idx=4 pixel=[50.0,331.0]
509
+ <cand> 9 at token 4902 sub_idx=4 pixel=[252.0,159.0]
510
+ <cand> 10 at token 4950 sub_idx=4 pixel=[126.0,240.0]
511
+ <cand> 11 at token 4999 sub_idx=4 pixel=[302.0,279.0]
512
+ <cand> 12 at token 5048 sub_idx=4 pixel=[165.0,304.0]
513
+ <cand> 13 at token 5097 sub_idx=4 pixel=[225.0,173.0]
514
+ <cand> 14 at token 5146 sub_idx=4 pixel=[282.0,164.0]
515
+ <cand> 0 at token 5663 sub_idx=5 pixel=[335.0,446.0]
516
+ <cand> 1 at token 5711 sub_idx=5 pixel=[200.0,223.0]
517
+ <cand> 2 at token 5759 sub_idx=5 pixel=[200.0,343.0]
518
+ <cand> 3 at token 5807 sub_idx=5 pixel=[251.0,326.0]
519
+ <cand> 4 at token 5855 sub_idx=5 pixel=[293.0,424.0]
520
+ <cand> 5 at token 5903 sub_idx=5 pixel=[250.0,217.0]
521
+ <cand> 6 at token 5951 sub_idx=5 pixel=[270.0,198.0]
522
+ <cand> 7 at token 5999 sub_idx=5 pixel=[310.0,410.0]
523
+ <cand> 0 at token 6513 sub_idx=6 pixel=[337.0,257.0]
524
+ <cand> 1 at token 6561 sub_idx=6 pixel=[160.0,200.0]
525
+ <cand> 2 at token 6609 sub_idx=6 pixel=[219.0,280.0]
526
+ <cand> 3 at token 6657 sub_idx=6 pixel=[155.0,245.0]
527
+ <cand> 4 at token 6705 sub_idx=6 pixel=[187.0,131.0]
528
+ <cand> 5 at token 6753 sub_idx=6 pixel=[192.0,200.0]
529
+ <cand> 6 at token 6799 sub_idx=6 pixel=[152.0,263.0]
530
+ <cand> 7 at token 6847 sub_idx=6 pixel=[355.0,303.0]
531
+ <cand> 8 at token 6895 sub_idx=6 pixel=[226.0,199.0]
532
+ <cand> 9 at token 6943 sub_idx=6 pixel=[356.0,288.0]
533
+ <cand> 10 at token 6991 sub_idx=6 pixel=[260.0,234.0]
534
+ <cand> 11 at token 7040 sub_idx=6 pixel=[203.0,143.0]
535
+ <cand> 12 at token 7089 sub_idx=6 pixel=[156.0,230.0]
536
+ <cand> 13 at token 7138 sub_idx=6 pixel=[286.0,277.0]
537
+ <cand> 14 at token 7187 sub_idx=6 pixel=[178.0,270.0]
538
+ <cand> 0 at token 7702 sub_idx=7 pixel=[249.0,247.0]
539
+ <cand> 1 at token 7748 sub_idx=7 pixel=[119.0,208.0]
540
+ <cand> 2 at token 7796 sub_idx=7 pixel=[59.0,192.0]
541
+ <cand> 3 at token 7844 sub_idx=7 pixel=[264.0,250.0]
542
+ <cand> 4 at token 7892 sub_idx=7 pixel=[263.0,200.0]
543
+ <cand> 5 at token 7940 sub_idx=7 pixel=[171.0,134.0]
544
+ <cand> 6 at token 7988 sub_idx=7 pixel=[215.0,260.0]
545
+ <cand> 7 at token 8036 sub_idx=7 pixel=[280.0,241.0]
546
+ <cand> 8 at token 8084 sub_idx=7 pixel=[180.0,110.0]
547
+ <cand> 0 at token 8600 sub_idx=8 pixel=[7.0,152.0]
548
+ <cand> 1 at token 8648 sub_idx=8 pixel=[114.0,86.0]
549
+ <cand> 2 at token 8696 sub_idx=8 pixel=[71.0,289.0]
550
+ <cand> 3 at token 8744 sub_idx=8 pixel=[5.0,218.0]
551
+ <cand> 4 at token 8792 sub_idx=8 pixel=[153.0,313.0]
552
+ <cand> 5 at token 8840 sub_idx=8 pixel=[203.0,321.0]
553
+ <cand> 6 at token 8888 sub_idx=8 pixel=[39.0,235.0]
554
+ <cand> 7 at token 8936 sub_idx=8 pixel=[232.0,227.0]
555
+ <cand> 8 at token 8982 sub_idx=8 pixel=[35.0,127.0]
556
+ <cand> 9 at token 9030 sub_idx=8 pixel=[95.0,119.0]
557
+ <cand> 10 at token 9078 sub_idx=8 pixel=[150.0,172.0]
558
+ <cand> 11 at token 9127 sub_idx=8 pixel=[226.0,243.0]
559
+ <cand> 0 at token 9642 sub_idx=9 pixel=[244.0,329.0]
560
+ <cand> 1 at token 9690 sub_idx=9 pixel=[226.0,285.0]
561
+ <cand> 2 at token 9738 sub_idx=9 pixel=[172.0,212.0]
562
+ <cand> 0 at token 10252 sub_idx=10 pixel=[250.0,304.0]
563
+ <cand> 1 at token 10300 sub_idx=10 pixel=[251.0,104.0]
564
+ <cand> 2 at token 10348 sub_idx=10 pixel=[213.0,270.0]
565
+ <cand> 3 at token 10396 sub_idx=10 pixel=[233.0,324.0]
566
+ <cand> 4 at token 10444 sub_idx=10 pixel=[217.0,3.0]
567
+ <cand> 0 at token 10957 sub_idx=11 pixel=[37.0,296.0]
568
+ <cand> 1 at token 11005 sub_idx=11 pixel=[30.0,314.0]
569
+ <cand> 2 at token 11053 sub_idx=11 pixel=[127.0,317.0]
570
+ <cand> 3 at token 11101 sub_idx=11 pixel=[22.0,286.0]
571
+ <cand> 4 at token 11149 sub_idx=11 pixel=[197.0,227.0]
572
+ <cand> 5 at token 11197 sub_idx=11 pixel=[314.0,262.0]
573
+ <cand> 6 at token 11245 sub_idx=11 pixel=[260.0,282.0]
574
+ <cand> 7 at token 11293 sub_idx=11 pixel=[314.0,171.0]
575
+ <cand> 8 at token 11339 sub_idx=11 pixel=[296.0,187.0]
576
+ <cand> 9 at token 11387 sub_idx=11 pixel=[234.0,269.0]
577
+ <cand> 10 at token 11435 sub_idx=11 pixel=[212.0,161.0]
578
+ <cand> 11 at token 11484 sub_idx=11 pixel=[322.0,153.0]
579
+ <cand> 12 at token 11533 sub_idx=11 pixel=[57.0,299.0]
580
+ <cand> 13 at token 11582 sub_idx=11 pixel=[301.0,151.0]
581
+ <cand> 14 at token 11631 sub_idx=11 pixel=[67.0,281.0]
582
+ <cand> 15 at token 11680 sub_idx=11 pixel=[60.0,284.0]
583
+ <cand> 16 at token 11729 sub_idx=11 pixel=[177.0,199.0]
584
+ <cand> 17 at token 11778 sub_idx=11 pixel=[50.0,324.0]
585
+ <cand> 18 at token 11827 sub_idx=11 pixel=[156.0,309.0]
586
+ <cand> 0 at token 12341 sub_idx=12 pixel=[270.0,120.0]
587
+ <cand> 1 at token 12389 sub_idx=12 pixel=[218.0,355.0]
588
+ <cand> 2 at token 12437 sub_idx=12 pixel=[301.0,310.0]
589
+ <cand> 3 at token 12485 sub_idx=12 pixel=[222.0,276.0]
590
+ <cand> 4 at token 12533 sub_idx=12 pixel=[148.0,143.0]
591
+ <cand> 5 at token 12581 sub_idx=12 pixel=[293.0,269.0]
592
+ <cand> 6 at token 12629 sub_idx=12 pixel=[199.0,357.0]
593
+ <cand> 7 at token 12677 sub_idx=12 pixel=[252.0,92.0]
594
+ <cand> 8 at token 12723 sub_idx=12 pixel=[256.0,172.0]
595
+ <cand> 9 at token 12771 sub_idx=12 pixel=[191.0,152.0]
596
+ <cand> 10 at token 12819 sub_idx=12 pixel=[166.0,123.0]
597
+ <cand> 11 at token 12868 sub_idx=12 pixel=[258.0,276.0]
598
+ <cand> 0 at token 13383 sub_idx=13 pixel=[116.0,124.0]
599
+ <cand> 1 at token 13431 sub_idx=13 pixel=[160.0,139.0]
600
+ <cand> 2 at token 13479 sub_idx=13 pixel=[238.0,303.0]
601
+ <cand> 3 at token 13527 sub_idx=13 pixel=[163.0,257.0]
602
+ <cand> 4 at token 13575 sub_idx=13 pixel=[183.0,213.0]
603
+ <cand> 5 at token 13623 sub_idx=13 pixel=[137.0,148.0]
604
+ <cand> 6 at token 13671 sub_idx=13 pixel=[128.0,114.0]
605
+ <cand> 7 at token 13719 sub_idx=13 pixel=[161.0,98.0]
606
+ <cand> 8 at token 13767 sub_idx=13 pixel=[268.0,205.0]
607
+ <cand> 9 at token 13815 sub_idx=13 pixel=[131.0,107.0]
608
+ <cand> 10 at token 13863 sub_idx=13 pixel=[194.0,277.0]
609
+ <cand> 11 at token 13912 sub_idx=13 pixel=[236.0,335.0]
610
+ <cand> 12 at token 13961 sub_idx=13 pixel=[176.0,281.0]
611
+ <cand> 0 at token 14475 sub_idx=14 pixel=[341.0,209.0]
612
+ <cand> 1 at token 14523 sub_idx=14 pixel=[208.0,231.0]
613
+ <cand> 2 at token 14571 sub_idx=14 pixel=[211.0,191.0]
614
+ <cand> 3 at token 14619 sub_idx=14 pixel=[231.0,187.0]
615
+ <cand> 4 at token 14667 sub_idx=14 pixel=[295.0,235.0]
616
+ <cand> 5 at token 14715 sub_idx=14 pixel=[274.0,113.0]
617
+ <cand> 6 at token 14763 sub_idx=14 pixel=[278.0,137.0]
618
+ <cand> 7 at token 14811 sub_idx=14 pixel=[330.0,162.0]
619
+ <cand> 8 at token 14859 sub_idx=14 pixel=[307.0,144.0]
620
+ <cand> 9 at token 14907 sub_idx=14 pixel=[318.0,181.0]
621
+ <cand> 0 at token 15421 sub_idx=15 pixel=[269.0,223.0]
622
+ <cand> 1 at token 15469 sub_idx=15 pixel=[204.0,178.0]
623
+ <cand> 2 at token 15517 sub_idx=15 pixel=[213.0,197.0]
624
+ <cand> 0 at token 16030 sub_idx=16 pixel=[336.0,162.0]
625
+ <cand> 1 at token 16078 sub_idx=16 pixel=[264.0,117.0]
626
+ <cand> 2 at token 16126 sub_idx=16 pixel=[355.0,214.0]
627
+ <cand> 3 at token 16174 sub_idx=16 pixel=[327.0,150.0]
628
+ <cand> 4 at token 16222 sub_idx=16 pixel=[156.0,161.0]
629
+ <cand> 5 at token 16270 sub_idx=16 pixel=[420.0,167.0]
630
+ <cand> 6 at token 16318 sub_idx=16 pixel=[147.0,187.0]
631
+ <cand> 7 at token 16366 sub_idx=16 pixel=[148.0,203.0]
632
+ <cand> 8 at token 16414 sub_idx=16 pixel=[239.0,145.0]
633
+ <cand> 9 at token 16462 sub_idx=16 pixel=[424.0,92.0]
634
+ <cand> 10 at token 16510 sub_idx=16 pixel=[214.0,290.0]
635
+ <cand> 11 at token 16559 sub_idx=16 pixel=[196.0,316.0]
636
+ <cand> 12 at token 16608 sub_idx=16 pixel=[195.0,261.0]
637
+ <cand> 13 at token 16657 sub_idx=16 pixel=[330.0,213.0]
638
+ <cand> 14 at token 16706 sub_idx=16 pixel=[247.0,230.0]
639
+ <cand> 15 at token 16755 sub_idx=16 pixel=[178.0,293.0]
640
+ <cand> 16 at token 16804 sub_idx=16 pixel=[156.0,168.0]
641
+ <cand> 17 at token 16853 sub_idx=16 pixel=[286.0,121.0]
642
+ <cand> 18 at token 16902 sub_idx=16 pixel=[251.0,317.0]
643
+ <cand> 19 at token 16951 sub_idx=16 pixel=[167.0,139.0]
644
+ <cand> 20 at token 17000 sub_idx=16 pixel=[409.0,99.0]
645
+ <cand> 0 at token 17515 sub_idx=17 pixel=[228.0,247.0]
646
+ <cand> 1 at token 17563 sub_idx=17 pixel=[288.0,254.0]
647
+ <cand> 2 at token 17611 sub_idx=17 pixel=[220.0,206.0]
648
+ <cand> 3 at token 17659 sub_idx=17 pixel=[221.0,334.0]
649
+ <cand> 0 at token 18175 sub_idx=18 pixel=[224.0,119.0]
650
+ <cand> 1 at token 18223 sub_idx=18 pixel=[406.0,250.0]
651
+ <cand> 2 at token 18271 sub_idx=18 pixel=[358.0,253.0]
652
+ <cand> 3 at token 18319 sub_idx=18 pixel=[348.0,228.0]
653
+ <cand> 4 at token 18367 sub_idx=18 pixel=[244.0,150.0]
654
+ <cand> 5 at token 18415 sub_idx=18 pixel=[195.0,207.0]
655
+ <cand> 6 at token 18463 sub_idx=18 pixel=[250.0,138.0]
656
+ <cand> 7 at token 18511 sub_idx=18 pixel=[417.0,106.0]
657
+ <cand> 8 at token 18559 sub_idx=18 pixel=[226.0,177.0]
658
+ <cand> 9 at token 18607 sub_idx=18 pixel=[399.0,289.0]
659
+ <cand> 10 at token 18655 sub_idx=18 pixel=[415.0,149.0]
660
+ <cand> 11 at token 18704 sub_idx=18 pixel=[209.0,271.0]
661
+ <cand> 12 at token 18751 sub_idx=18 pixel=[428.0,276.0]
662
+ <cand> 13 at token 18800 sub_idx=18 pixel=[403.0,169.0]
663
+ <cand> 0 at token 19316 sub_idx=19 pixel=[241.0,242.0]
664
+ <cand> 1 at token 19364 sub_idx=19 pixel=[270.0,236.0]
665
+ <cand> 2 at token 19412 sub_idx=19 pixel=[222.0,274.0]
666
+ <cand> 3 at token 19460 sub_idx=19 pixel=[292.0,243.0]
667
+ <cand> 4 at token 19508 sub_idx=19 pixel=[258.0,75.0]
668
+ <cand> 5 at token 19556 sub_idx=19 pixel=[202.0,232.0]
669
+ <cand> 6 at token 19604 sub_idx=19 pixel=[229.0,89.0]
670
+ <cand> 7 at token 19652 sub_idx=19 pixel=[244.0,225.0]
671
+ <cand> 8 at token 19700 sub_idx=19 pixel=[247.0,103.0]
672
+ <cand> 9 at token 19748 sub_idx=19 pixel=[190.0,276.0]
673
+ <cand> 0 at token 20261 sub_idx=20 pixel=[131.0,352.0]
674
+ <cand> 1 at token 20309 sub_idx=20 pixel=[186.0,236.0]
675
+ <cand> 2 at token 20357 sub_idx=20 pixel=[228.0,118.0]
676
+ <cand> 3 at token 20405 sub_idx=20 pixel=[283.0,238.0]
677
+ <cand> 4 at token 20453 sub_idx=20 pixel=[162.0,256.0]
678
+ <cand> 5 at token 20501 sub_idx=20 pixel=[153.0,340.0]
679
+ <cand> 6 at token 20549 sub_idx=20 pixel=[170.0,237.0]
680
+ <cand> 0 at token 21062 sub_idx=21 pixel=[214.0,247.0]
681
+ <cand> 1 at token 21110 sub_idx=21 pixel=[174.0,242.0]
682
+ <cand> 2 at token 21158 sub_idx=21 pixel=[221.0,302.0]
683
+ <cand> 3 at token 21206 sub_idx=21 pixel=[175.0,257.0]
684
+ <cand> 4 at token 21254 sub_idx=21 pixel=[268.0,238.0]
685
+ <cand> 5 at token 21302 sub_idx=21 pixel=[294.0,212.0]
686
+ <cand> 6 at token 21350 sub_idx=21 pixel=[214.0,268.0]
687
+ <cand> 7 at token 21398 sub_idx=21 pixel=[297.0,236.0]
688
+ <cand> 8 at token 21446 sub_idx=21 pixel=[234.0,261.0]
689
+ <cand> 9 at token 21494 sub_idx=21 pixel=[197.0,249.0]
690
+ <cand> 0 at token 22008 sub_idx=22 pixel=[138.0,189.0]
691
+ <cand> 1 at token 22056 sub_idx=22 pixel=[106.0,189.0]
692
+ <cand> 2 at token 22104 sub_idx=22 pixel=[234.0,164.0]
693
+ <cand> 3 at token 22152 sub_idx=22 pixel=[291.0,224.0]
694
+ <cand> 4 at token 22200 sub_idx=22 pixel=[107.0,149.0]
695
+ <cand> 5 at token 22248 sub_idx=22 pixel=[178.0,153.0]
696
+ <cand> 6 at token 22296 sub_idx=22 pixel=[126.0,200.0]
697
+ <cand> 7 at token 22344 sub_idx=22 pixel=[128.0,180.0]
698
+ <cand> 8 at token 22392 sub_idx=22 pixel=[251.0,202.0]
699
+ <cand> 9 at token 22438 sub_idx=22 pixel=[21.0,273.0]
700
+ <cand> 10 at token 22486 sub_idx=22 pixel=[83.0,194.0]
701
+ <cand> 11 at token 22535 sub_idx=22 pixel=[89.0,240.0]
702
+ <cand> 12 at token 22584 sub_idx=22 pixel=[49.0,253.0]
703
+ <cand> 13 at token 22633 sub_idx=22 pixel=[203.0,207.0]
704
+ <cand> 14 at token 22682 sub_idx=22 pixel=[96.0,148.0]
705
+ <cand> 15 at token 22731 sub_idx=22 pixel=[262.0,153.0]
706
+ <cand> 16 at token 22780 sub_idx=22 pixel=[86.0,151.0]
707
+ <cand> 17 at token 22829 sub_idx=22 pixel=[85.0,169.0]
708
+ <cand> 0 at token 23343 sub_idx=23 pixel=[251.0,113.0]
709
+ <cand> 1 at token 23391 sub_idx=23 pixel=[221.0,124.0]
710
+ <cand> 2 at token 23439 sub_idx=23 pixel=[244.0,150.0]
711
+ <cand> 3 at token 23487 sub_idx=23 pixel=[303.0,214.0]
712
+ <cand> 0 at token 24000 sub_idx=24 pixel=[266.0,166.0]
713
+ <cand> 1 at token 24048 sub_idx=24 pixel=[200.0,101.0]
714
+ <cand> 2 at token 24096 sub_idx=24 pixel=[260.0,121.0]
715
+ <e_cand> 0 at token 490 sub_idx=0 (same embed)
716
+ <e_cand> 1 at token 538 sub_idx=0 (same embed)
717
+ <e_cand> 2 at token 586 sub_idx=0 (same embed)
718
+ <e_cand> 0 at token 1387 sub_idx=1 (same embed)
719
+ <e_cand> 1 at token 1435 sub_idx=1 (same embed)
720
+ <e_cand> 2 at token 1483 sub_idx=1 (same embed)
721
+ <e_cand> 0 at token 2527 sub_idx=2 (same embed)
722
+ <e_cand> 1 at token 2575 sub_idx=2 (same embed)
723
+ <e_cand> 2 at token 2623 sub_idx=2 (same embed)
724
+ <e_cand> 0 at token 3522 sub_idx=3 (same embed)
725
+ <e_cand> 1 at token 3570 sub_idx=3 (same embed)
726
+ <e_cand> 2 at token 3618 sub_idx=3 (same embed)
727
+ <e_cand> 0 at token 4516 sub_idx=4 (same embed)
728
+ <e_cand> 1 at token 4564 sub_idx=4 (same embed)
729
+ <e_cand> 2 at token 4612 sub_idx=4 (same embed)
730
+ <e_cand> 0 at token 5709 sub_idx=5 (same embed)
731
+ <e_cand> 1 at token 5757 sub_idx=5 (same embed)
732
+ <e_cand> 2 at token 5805 sub_idx=5 (same embed)
733
+ <e_cand> 0 at token 6559 sub_idx=6 (same embed)
734
+ <e_cand> 1 at token 6607 sub_idx=6 (same embed)
735
+ <e_cand> 2 at token 6655 sub_idx=6 (same embed)
736
+ <e_cand> 0 at token 7746 sub_idx=7 (same embed)
737
+ <e_cand> 1 at token 7794 sub_idx=7 (same embed)
738
+ <e_cand> 2 at token 7842 sub_idx=7 (same embed)
739
+ <e_cand> 0 at token 8646 sub_idx=8 (same embed)
740
+ <e_cand> 1 at token 8694 sub_idx=8 (same embed)
741
+ <e_cand> 2 at token 8742 sub_idx=8 (same embed)
742
+ <e_cand> 0 at token 9688 sub_idx=9 (same embed)
743
+ <e_cand> 1 at token 9736 sub_idx=9 (same embed)
744
+ <e_cand> 2 at token 9784 sub_idx=9 (same embed)
745
+ <e_cand> 0 at token 10298 sub_idx=10 (same embed)
746
+ <e_cand> 1 at token 10346 sub_idx=10 (same embed)
747
+ <e_cand> 2 at token 10394 sub_idx=10 (same embed)
748
+ <e_cand> 0 at token 11003 sub_idx=11 (same embed)
749
+ <e_cand> 1 at token 11051 sub_idx=11 (same embed)
750
+ <e_cand> 2 at token 11099 sub_idx=11 (same embed)
751
+ <e_cand> 0 at token 12387 sub_idx=12 (same embed)
752
+ <e_cand> 1 at token 12435 sub_idx=12 (same embed)
753
+ <e_cand> 2 at token 12483 sub_idx=12 (same embed)
754
+ <e_cand> 0 at token 13429 sub_idx=13 (same embed)
755
+ <e_cand> 1 at token 13477 sub_idx=13 (same embed)
756
+ <e_cand> 2 at token 13525 sub_idx=13 (same embed)
757
+ <e_cand> 0 at token 14521 sub_idx=14 (same embed)
758
+ <e_cand> 1 at token 14569 sub_idx=14 (same embed)
759
+ <e_cand> 2 at token 14617 sub_idx=14 (same embed)
760
+ <e_cand> 0 at token 15467 sub_idx=15 (same embed)
761
+ <e_cand> 1 at token 15515 sub_idx=15 (same embed)
762
+ <e_cand> 2 at token 15563 sub_idx=15 (same embed)
763
+ <e_cand> 0 at token 16076 sub_idx=16 (same embed)
764
+ <e_cand> 1 at token 16124 sub_idx=16 (same embed)
765
+ <e_cand> 2 at token 16172 sub_idx=16 (same embed)
766
+ <e_cand> 0 at token 17561 sub_idx=17 (same embed)
767
+ <e_cand> 1 at token 17609 sub_idx=17 (same embed)
768
+ <e_cand> 2 at token 17657 sub_idx=17 (same embed)
769
+ <e_cand> 0 at token 18221 sub_idx=18 (same embed)
770
+ <e_cand> 1 at token 18269 sub_idx=18 (same embed)
771
+ <e_cand> 2 at token 18317 sub_idx=18 (same embed)
772
+ <e_cand> 0 at token 19362 sub_idx=19 (same embed)
773
+ <e_cand> 1 at token 19410 sub_idx=19 (same embed)
774
+ <e_cand> 2 at token 19458 sub_idx=19 (same embed)
775
+ <e_cand> 0 at token 20307 sub_idx=20 (same embed)
776
+ <e_cand> 1 at token 20355 sub_idx=20 (same embed)
777
+ <e_cand> 2 at token 20403 sub_idx=20 (same embed)
778
+ <e_cand> 0 at token 21108 sub_idx=21 (same embed)
779
+ <e_cand> 1 at token 21156 sub_idx=21 (same embed)
780
+ <e_cand> 2 at token 21204 sub_idx=21 (same embed)
781
+ <e_cand> 0 at token 22054 sub_idx=22 (same embed)
782
+ <e_cand> 1 at token 22102 sub_idx=22 (same embed)
783
+ <e_cand> 2 at token 22150 sub_idx=22 (same embed)
784
+ <e_cand> 0 at token 23389 sub_idx=23 (same embed)
785
+ <e_cand> 1 at token 23437 sub_idx=23 (same embed)
786
+ <e_cand> 2 at token 23485 sub_idx=23 (same embed)
787
+ <e_cand> 0 at token 24046 sub_idx=24 (same embed)
788
+ <e_cand> 1 at token 24094 sub_idx=24 (same embed)
789
+ <e_cand> 2 at token 24142 sub_idx=24 (same embed)
790
+
791
+ [2026-02-25 06:18:02,804] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 2.25 | optimizer_gradients: 5.18 | optimizer_step: 3.28
792
+ [2026-02-25 06:18:02,815] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6729.83 | bwd_microstep: 12622.88 | bwd_inner_microstep: 6539.62 | bwd_allreduce_microstep: 6083.17 | step_microstep: 47.45
793
+ [2026-02-25 06:18:02,816] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6729.83 | bwd: 12622.88 | bwd_inner: 6539.61 | bwd_allreduce: 6083.19 | step: 48.26
794
+ {'loss': 1.6327, 'learning_rate': 6.060606060606061e-07, 'epoch': 0.0}
795
+ 02/25/2026 06:18:09 - INFO - internvl_cleaned.model.internvl_chat.modeling_internvl_chat - [Text Position Embedding Verification] Sample 3:
796
+ <s> at token 437 sub_idx=0
797
+ <s> at token 1145 sub_idx=1
798
+ <s> at token 2237 sub_idx=2
799
+ <s> at token 3038 sub_idx=3
800
+ <s> at token 3888 sub_idx=4
801
+ <s> at token 5374 sub_idx=5
802
+ <s> at token 6370 sub_idx=6
803
+ <s> at token 7411 sub_idx=7
804
+ <s> at token 8261 sub_idx=8
805
+ <s> at token 9254 sub_idx=9
806
+ <s> at token 10887 sub_idx=10
807
+ <s> at token 12126 sub_idx=11
808
+ <s> at token 13072 sub_idx=12
809
+ <s> at token 13967 sub_idx=13
810
+ <s> at token 14913 sub_idx=14
811
+ <s> at token 15811 sub_idx=15
812
+ <s> at token 16658 sub_idx=16
813
+ <s> at token 17459 sub_idx=17
814
+ <s> at token 18500 sub_idx=18
815
+ <s> at token 19492 sub_idx=19
816
+ <s> at token 20295 sub_idx=20
817
+ <s> at token 21049 sub_idx=21
818
+ <s> at token 21803 sub_idx=22
819
+ <s> at token 22750 sub_idx=23
820
+ <s> at token 23598 sub_idx=24
821
+ <s> at token 24448 sub_idx=25
822
+ <e_s> at token 439 sub_idx=0
823
+ <e_s> at token 1147 sub_idx=1
824
+ <e_s> at token 2239 sub_idx=2
825
+ <e_s> at token 3040 sub_idx=3
826
+ <e_s> at token 3890 sub_idx=4
827
+ <e_s> at token 5376 sub_idx=5
828
+ <e_s> at token 6372 sub_idx=6
829
+ <e_s> at token 7413 sub_idx=7
830
+ <e_s> at token 8263 sub_idx=8
831
+ <e_s> at token 9256 sub_idx=9
832
+ <e_s> at token 10889 sub_idx=10
833
+ <e_s> at token 12128 sub_idx=11
834
+ <e_s> at token 13074 sub_idx=12
835
+ <e_s> at token 13969 sub_idx=13
836
+ <e_s> at token 14915 sub_idx=14
837
+ <e_s> at token 15813 sub_idx=15
838
+ <e_s> at token 16660 sub_idx=16
839
+ <e_s> at token 17461 sub_idx=17
840
+ <e_s> at token 18502 sub_idx=18
841
+ <e_s> at token 19494 sub_idx=19
842
+ <e_s> at token 20297 sub_idx=20
843
+ <e_s> at token 21051 sub_idx=21
844
+ <e_s> at token 21805 sub_idx=22
845
+ <e_s> at token 22752 sub_idx=23
846
+ <e_s> at token 23600 sub_idx=24
847
+ <e_s> at token 24450 sub_idx=25
848
+ <cand> 0 at token 444 sub_idx=0 pixel=[229.0,302.0]
849
+ <cand> 1 at token 492 sub_idx=0 pixel=[152.0,186.0]
850
+ <cand> 2 at token 540 sub_idx=0 pixel=[122.0,189.0]
851
+ <cand> 3 at token 588 sub_idx=0 pixel=[155.0,209.0]
852
+ <cand> 4 at token 636 sub_idx=0 pixel=[189.0,226.0]
853
+ <cand> 0 at token 1152 sub_idx=1 pixel=[170.0,104.0]
854
+ <cand> 1 at token 1200 sub_idx=1 pixel=[192.0,94.0]
855
+ <cand> 2 at token 1248 sub_idx=1 pixel=[181.0,77.0]
856
+ <cand> 3 at token 1296 sub_idx=1 pixel=[248.0,199.0]
857
+ <cand> 4 at token 1344 sub_idx=1 pixel=[243.0,272.0]
858
+ <cand> 5 at token 1392 sub_idx=1 pixel=[288.0,95.0]
859
+ <cand> 6 at token 1440 sub_idx=1 pixel=[242.0,239.0]
860
+ <cand> 7 at token 1488 sub_idx=1 pixel=[186.0,256.0]
861
+ <cand> 8 at token 1536 sub_idx=1 pixel=[272.0,134.0]
862
+ <cand> 9 at token 1584 sub_idx=1 pixel=[165.0,127.0]
863
+ <cand> 10 at token 1632 sub_idx=1 pixel=[214.0,59.0]
864
+ <cand> 11 at token 1681 sub_idx=1 pixel=[186.0,272.0]
865
+ <cand> 12 at token 1728 sub_idx=1 pixel=[148.0,129.0]
866
+ <cand> 0 at token 2244 sub_idx=2 pixel=[310.0,277.0]
867
+ <cand> 1 at token 2292 sub_idx=2 pixel=[256.0,155.0]
868
+ <cand> 2 at token 2340 sub_idx=2 pixel=[293.0,278.0]
869
+ <cand> 3 at token 2388 sub_idx=2 pixel=[304.0,258.0]
870
+ <cand> 4 at token 2436 sub_idx=2 pixel=[194.0,257.0]
871
+ <cand> 5 at token 2484 sub_idx=2 pixel=[198.0,186.0]
872
+ <cand> 6 at token 2532 sub_idx=2 pixel=[243.0,165.0]
873
+ <cand> 0 at token 3045 sub_idx=3 pixel=[313.0,271.0]
874
+ <cand> 1 at token 3093 sub_idx=3 pixel=[304.0,294.0]
875
+ <cand> 2 at token 3141 sub_idx=3 pixel=[287.0,231.0]
876
+ <cand> 3 at token 3189 sub_idx=3 pixel=[274.0,104.0]
877
+ <cand> 4 at token 3237 sub_idx=3 pixel=[116.0,235.0]
878
+ <cand> 5 at token 3285 sub_idx=3 pixel=[269.0,245.0]
879
+ <cand> 6 at token 3333 sub_idx=3 pixel=[161.0,212.0]
880
+ <cand> 7 at token 3381 sub_idx=3 pixel=[195.0,247.0]
881
+ <cand> 0 at token 3895 sub_idx=4 pixel=[89.0,206.0]
882
+ <cand> 1 at token 3943 sub_idx=4 pixel=[303.0,296.0]
883
+ <cand> 2 at token 3991 sub_idx=4 pixel=[258.0,208.0]
884
+ <cand> 3 at token 4039 sub_idx=4 pixel=[111.0,226.0]
885
+ <cand> 4 at token 4087 sub_idx=4 pixel=[123.0,269.0]
886
+ <cand> 5 at token 4135 sub_idx=4 pixel=[168.0,277.0]
887
+ <cand> 6 at token 4183 sub_idx=4 pixel=[229.0,266.0]
888
+ <cand> 7 at token 4231 sub_idx=4 pixel=[102.0,248.0]
889
+ <cand> 8 at token 4279 sub_idx=4 pixel=[279.0,378.0]
890
+ <cand> 9 at token 4327 sub_idx=4 pixel=[230.0,369.0]
891
+ <cand> 10 at token 4375 sub_idx=4 pixel=[249.0,349.0]
892
+ <cand> 11 at token 4424 sub_idx=4 pixel=[249.0,128.0]
893
+ <cand> 12 at token 4473 sub_idx=4 pixel=[260.0,284.0]
894
+ <cand> 13 at token 4522 sub_idx=4 pixel=[241.0,109.0]
895
+ <cand> 14 at token 4571 sub_idx=4 pixel=[140.0,243.0]
896
+ <cand> 15 at token 4620 sub_idx=4 pixel=[299.0,272.0]
897
+ <cand> 16 at token 4669 sub_idx=4 pixel=[112.0,187.0]
898
+ <cand> 17 at token 4718 sub_idx=4 pixel=[161.0,346.0]
899
+ <cand> 18 at token 4767 sub_idx=4 pixel=[173.0,210.0]
900
+ <cand> 19 at token 4816 sub_idx=4 pixel=[122.0,345.0]
901
+ <cand> 20 at token 4865 sub_idx=4 pixel=[257.0,355.0]
902
+ <cand> 0 at token 5381 sub_idx=5 pixel=[193.0,254.0]
903
+ <cand> 1 at token 5429 sub_idx=5 pixel=[201.0,319.0]
904
+ <cand> 2 at token 5477 sub_idx=5 pixel=[135.0,302.0]
905
+ <cand> 3 at token 5525 sub_idx=5 pixel=[200.0,241.0]
906
+ <cand> 4 at token 5573 sub_idx=5 pixel=[221.0,294.0]
907
+ <cand> 5 at token 5621 sub_idx=5 pixel=[187.0,272.0]
908
+ <cand> 6 at token 5669 sub_idx=5 pixel=[257.0,313.0]
909
+ <cand> 7 at token 5717 sub_idx=5 pixel=[237.0,250.0]
910
+ <cand> 8 at token 5765 sub_idx=5 pixel=[152.0,103.0]
911
+ <cand> 9 at token 5813 sub_idx=5 pixel=[246.0,281.0]
912
+ <cand> 10 at token 5861 sub_idx=5 pixel=[204.0,330.0]
913
+ <cand> 0 at token 6377 sub_idx=6 pixel=[119.0,307.0]
914
+ <cand> 1 at token 6425 sub_idx=6 pixel=[242.0,189.0]
915
+ <cand> 2 at token 6473 sub_idx=6 pixel=[272.0,272.0]
916
+ <cand> 3 at token 6521 sub_idx=6 pixel=[216.0,188.0]
917
+ <cand> 4 at token 6567 sub_idx=6 pixel=[110.0,294.0]
918
+ <cand> 5 at token 6615 sub_idx=6 pixel=[307.0,290.0]
919
+ <cand> 6 at token 6663 sub_idx=6 pixel=[120.0,326.0]
920
+ <cand> 7 at token 6711 sub_idx=6 pixel=[320.0,310.0]
921
+ <cand> 8 at token 6759 sub_idx=6 pixel=[231.0,183.0]
922
+ <cand> 9 at token 6807 sub_idx=6 pixel=[173.0,176.0]
923
+ <cand> 10 at token 6855 sub_idx=6 pixel=[207.0,203.0]
924
+ <cand> 11 at token 6904 sub_idx=6 pixel=[272.0,321.0]
925
+ <cand> 0 at token 7418 sub_idx=7 pixel=[246.0,234.0]
926
+ <cand> 1 at token 7464 sub_idx=7 pixel=[229.0,295.0]
927
+ <cand> 2 at token 7512 sub_idx=7 pixel=[176.0,318.0]
928
+ <cand> 3 at token 7560 sub_idx=7 pixel=[193.0,203.0]
929
+ <cand> 4 at token 7608 sub_idx=7 pixel=[135.0,197.0]
930
+ <cand> 5 at token 7656 sub_idx=7 pixel=[237.0,65.0]
931
+ <cand> 6 at token 7704 sub_idx=7 pixel=[253.0,249.0]
932
+ <cand> 7 at token 7752 sub_idx=7 pixel=[243.0,256.0]
933
+ <cand> 0 at token 8268 sub_idx=8 pixel=[193.0,211.0]
934
+ <cand> 1 at token 8316 sub_idx=8 pixel=[299.0,179.0]
935
+ <cand> 2 at token 8364 sub_idx=8 pixel=[323.0,190.0]
936
+ <cand> 3 at token 8412 sub_idx=8 pixel=[206.0,270.0]
937
+ <cand> 4 at token 8458 sub_idx=8 pixel=[202.0,88.0]
938
+ <cand> 5 at token 8506 sub_idx=8 pixel=[254.0,142.0]
939
+ <cand> 6 at token 8554 sub_idx=8 pixel=[360.0,198.0]
940
+ <cand> 7 at token 8602 sub_idx=8 pixel=[222.0,125.0]
941
+ <cand> 8 at token 8650 sub_idx=8 pixel=[219.0,97.0]
942
+ <cand> 9 at token 8698 sub_idx=8 pixel=[349.0,215.0]
943
+ <cand> 10 at token 8746 sub_idx=8 pixel=[308.0,112.0]
944
+ <cand> 0 at token 9261 sub_idx=9 pixel=[155.0,287.0]
945
+ <cand> 1 at token 9309 sub_idx=9 pixel=[195.0,291.0]
946
+ <cand> 2 at token 9357 sub_idx=9 pixel=[164.0,298.0]
947
+ <cand> 3 at token 9405 sub_idx=9 pixel=[257.0,123.0]
948
+ <cand> 4 at token 9453 sub_idx=9 pixel=[268.0,152.0]
949
+ <cand> 5 at token 9501 sub_idx=9 pixel=[301.0,179.0]
950
+ <cand> 6 at token 9549 sub_idx=9 pixel=[348.0,323.0]
951
+ <cand> 7 at token 9597 sub_idx=9 pixel=[192.0,128.0]
952
+ <cand> 8 at token 9645 sub_idx=9 pixel=[230.0,285.0]
953
+ <cand> 9 at token 9693 sub_idx=9 pixel=[322.0,200.0]
954
+ <cand> 10 at token 9741 sub_idx=9 pixel=[229.0,243.0]
955
+ <cand> 11 at token 9788 sub_idx=9 pixel=[376.0,261.0]
956
+ <cand> 12 at token 9837 sub_idx=9 pixel=[302.0,195.0]
957
+ <cand> 13 at token 9886 sub_idx=9 pixel=[274.0,368.0]
958
+ <cand> 14 at token 9935 sub_idx=9 pixel=[372.0,364.0]
959
+ <cand> 15 at token 9984 sub_idx=9 pixel=[342.0,173.0]
960
+ <cand> 16 at token 10033 sub_idx=9 pixel=[311.0,303.0]
961
+ <cand> 17 at token 10082 sub_idx=9 pixel=[376.0,344.0]
962
+ <cand> 18 at token 10131 sub_idx=9 pixel=[222.0,144.0]
963
+ <cand> 19 at token 10180 sub_idx=9 pixel=[394.0,290.0]
964
+ <cand> 20 at token 10229 sub_idx=9 pixel=[212.0,252.0]
965
+ <cand> 21 at token 10278 sub_idx=9 pixel=[330.0,349.0]
966
+ <cand> 22 at token 10327 sub_idx=9 pixel=[239.0,143.0]
967
+ <cand> 23 at token 10376 sub_idx=9 pixel=[344.0,300.0]
968
+ <cand> 0 at token 10894 sub_idx=10 pixel=[297.0,230.0]
969
+ <cand> 1 at token 10942 sub_idx=10 pixel=[215.0,329.0]
970
+ <cand> 2 at token 10990 sub_idx=10 pixel=[176.0,245.0]
971
+ <cand> 3 at token 11038 sub_idx=10 pixel=[209.0,170.0]
972
+ <cand> 4 at token 11086 sub_idx=10 pixel=[322.0,354.0]
973
+ <cand> 5 at token 11134 sub_idx=10 pixel=[156.0,177.0]
974
+ <cand> 6 at token 11182 sub_idx=10 pixel=[280.0,202.0]
975
+ <cand> 7 at token 11230 sub_idx=10 pixel=[136.0,247.0]
976
+ <cand> 8 at token 11278 sub_idx=10 pixel=[375.0,363.0]
977
+ <cand> 9 at token 11326 sub_idx=10 pixel=[262.0,159.0]
978
+ <cand> 10 at token 11374 sub_idx=10 pixel=[282.0,121.0]
979
+ <cand> 11 at token 11423 sub_idx=10 pixel=[166.0,196.0]
980
+ <cand> 12 at token 11472 sub_idx=10 pixel=[296.0,109.0]
981
+ <cand> 13 at token 11521 sub_idx=10 pixel=[292.0,98.0]
982
+ <cand> 14 at token 11570 sub_idx=10 pixel=[154.0,218.0]
983
+ <cand> 15 at token 11619 sub_idx=10 pixel=[217.0,278.0]
984
+ <cand> 0 at token 12133 sub_idx=11 pixel=[257.0,140.0]
985
+ <cand> 1 at token 12181 sub_idx=11 pixel=[181.0,185.0]
986
+ <cand> 2 at token 12229 sub_idx=11 pixel=[287.0,274.0]
987
+ <cand> 3 at token 12277 sub_idx=11 pixel=[249.0,204.0]
988
+ <cand> 4 at token 12325 sub_idx=11 pixel=[230.0,152.0]
989
+ <cand> 5 at token 12373 sub_idx=11 pixel=[250.0,134.0]
990
+ <cand> 6 at token 12421 sub_idx=11 pixel=[288.0,222.0]
991
+ <cand> 7 at token 12469 sub_idx=11 pixel=[124.0,273.0]
992
+ <cand> 8 at token 12517 sub_idx=11 pixel=[346.0,232.0]
993
+ <cand> 9 at token 12565 sub_idx=11 pixel=[344.0,264.0]
994
+ <cand> 0 at token 13079 sub_idx=12 pixel=[67.0,162.0]
995
+ <cand> 1 at token 13127 sub_idx=12 pixel=[109.0,224.0]
996
+ <cand> 2 at token 13175 sub_idx=12 pixel=[231.0,230.0]
997
+ <cand> 3 at token 13221 sub_idx=12 pixel=[151.0,174.0]
998
+ <cand> 4 at token 13269 sub_idx=12 pixel=[73.0,164.0]
999
+ <cand> 5 at token 13317 sub_idx=12 pixel=[203.0,215.0]
1000
+ <cand> 6 at token 13365 sub_idx=12 pixel=[89.0,165.0]
1001
+ <cand> 7 at token 13413 sub_idx=12 pixel=[108.0,193.0]
1002
+ <cand> 8 at token 13461 sub_idx=12 pixel=[135.0,230.0]
1003
+ <cand> 0 at token 13974 sub_idx=13 pixel=[247.0,149.0]
1004
+ <cand> 1 at token 14022 sub_idx=13 pixel=[265.0,271.0]
1005
+ <cand> 2 at token 14070 sub_idx=13 pixel=[152.0,198.0]
1006
+ <cand> 3 at token 14118 sub_idx=13 pixel=[182.0,202.0]
1007
+ <cand> 4 at token 14166 sub_idx=13 pixel=[268.0,310.0]
1008
+ <cand> 5 at token 14214 sub_idx=13 pixel=[237.0,209.0]
1009
+ <cand> 6 at token 14262 sub_idx=13 pixel=[244.0,107.0]
1010
+ <cand> 7 at token 14310 sub_idx=13 pixel=[198.0,223.0]
1011
+ <cand> 8 at token 14358 sub_idx=13 pixel=[238.0,242.0]
1012
+ <cand> 9 at token 14406 sub_idx=13 pixel=[220.0,124.0]
1013
+ <cand> 0 at token 14920 sub_idx=14 pixel=[146.0,258.0]
1014
+ <cand> 1 at token 14968 sub_idx=14 pixel=[239.0,149.0]
1015
+ <cand> 2 at token 15016 sub_idx=14 pixel=[145.0,290.0]
1016
+ <cand> 3 at token 15064 sub_idx=14 pixel=[289.0,247.0]
1017
+ <cand> 4 at token 15112 sub_idx=14 pixel=[132.0,108.0]
1018
+ <cand> 5 at token 15160 sub_idx=14 pixel=[267.0,221.0]
1019
+ <cand> 6 at token 15208 sub_idx=14 pixel=[302.0,234.0]
1020
+ <cand> 7 at token 15256 sub_idx=14 pixel=[239.0,204.0]
1021
+ <cand> 8 at token 15304 sub_idx=14 pixel=[166.0,205.0]
1022
+ <cand> 0 at token 15818 sub_idx=15 pixel=[232.0,165.0]
1023
+ <cand> 1 at token 15866 sub_idx=15 pixel=[208.0,176.0]
1024
+ <cand> 2 at token 15914 sub_idx=15 pixel=[230.0,142.0]
1025
+ <cand> 3 at token 15962 sub_idx=15 pixel=[211.0,143.0]
1026
+ <cand> 4 at token 16010 sub_idx=15 pixel=[227.0,131.0]
1027
+ <cand> 5 at token 16056 sub_idx=15 pixel=[222.0,265.0]
1028
+ <cand> 6 at token 16104 sub_idx=15 pixel=[194.0,268.0]
1029
+ <cand> 7 at token 16152 sub_idx=15 pixel=[240.0,216.0]
1030
+ <cand> 0 at token 16665 sub_idx=16 pixel=[254.0,190.0]
1031
+ <cand> 1 at token 16713 sub_idx=16 pixel=[228.0,272.0]
1032
+ <cand> 2 at token 16761 sub_idx=16 pixel=[229.0,191.0]
1033
+ <cand> 3 at token 16809 sub_idx=16 pixel=[263.0,264.0]
1034
+ <cand> 4 at token 16857 sub_idx=16 pixel=[221.0,318.0]
1035
+ <cand> 5 at token 16905 sub_idx=16 pixel=[241.0,314.0]
1036
+ <cand> 6 at token 16953 sub_idx=16 pixel=[261.0,283.0]
1037
+ <cand> 0 at token 17466 sub_idx=17 pixel=[219.0,186.0]
1038
+ <cand> 1 at token 17514 sub_idx=17 pixel=[212.0,228.0]
1039
+ <cand> 2 at token 17560 sub_idx=17 pixel=[197.0,98.0]
1040
+ <cand> 3 at token 17608 sub_idx=17 pixel=[293.0,185.0]
1041
+ <cand> 4 at token 17656 sub_idx=17 pixel=[299.0,121.0]
1042
+ <cand> 5 at token 17704 sub_idx=17 pixel=[208.0,270.0]
1043
+ <cand> 6 at token 17752 sub_idx=17 pixel=[355.0,202.0]
1044
+ <cand> 7 at token 17800 sub_idx=17 pixel=[218.0,137.0]
1045
+ <cand> 8 at token 17848 sub_idx=17 pixel=[199.0,234.0]
1046
+ <cand> 9 at token 17896 sub_idx=17 pixel=[347.0,218.0]
1047
+ <cand> 10 at token 17944 sub_idx=17 pixel=[213.0,107.0]
1048
+ <cand> 11 at token 17993 sub_idx=17 pixel=[254.0,143.0]
1049
+ <cand> 0 at token 18507 sub_idx=18 pixel=[182.0,261.0]
1050
+ <cand> 1 at token 18555 sub_idx=18 pixel=[147.0,252.0]
1051
+ <cand> 2 at token 18603 sub_idx=18 pixel=[174.0,216.0]
1052
+ <cand> 3 at token 18651 sub_idx=18 pixel=[191.0,237.0]
1053
+ <cand> 4 at token 18699 sub_idx=18 pixel=[147.0,215.0]
1054
+ <cand> 5 at token 18747 sub_idx=18 pixel=[269.0,230.0]
1055
+ <cand> 6 at token 18793 sub_idx=18 pixel=[278.0,201.0]
1056
+ <cand> 7 at token 18841 sub_idx=18 pixel=[281.0,227.0]
1057
+ <cand> 8 at token 18889 sub_idx=18 pixel=[173.0,268.0]
1058
+ <cand> 9 at token 18937 sub_idx=18 pixel=[252.0,230.0]
1059
+ <cand> 10 at token 18985 sub_idx=18 pixel=[186.0,212.0]
1060
+ <cand> 0 at token 19499 sub_idx=19 pixel=[185.0,30.0]
1061
+ <cand> 1 at token 19547 sub_idx=19 pixel=[170.0,89.0]
1062
+ <cand> 2 at token 19595 sub_idx=19 pixel=[176.0,303.0]
1063
+ <cand> 3 at token 19643 sub_idx=19 pixel=[185.0,244.0]
1064
+ <cand> 4 at token 19691 sub_idx=19 pixel=[272.0,184.0]
1065
+ <cand> 5 at token 19739 sub_idx=19 pixel=[155.0,133.0]
1066
+ <cand> 6 at token 19787 sub_idx=19 pixel=[258.0,102.0]
1067
+ <cand> 0 at token 20302 sub_idx=20 pixel=[288.0,265.0]
1068
+ <cand> 1 at token 20350 sub_idx=20 pixel=[279.0,208.0]
1069
+ <cand> 2 at token 20398 sub_idx=20 pixel=[228.0,257.0]
1070
+ <cand> 3 at token 20446 sub_idx=20 pixel=[218.0,120.0]
1071
+ <cand> 4 at token 20494 sub_idx=20 pixel=[310.0,285.0]
1072
+ <cand> 5 at token 20542 sub_idx=20 pixel=[264.0,292.0]
1073
+ <cand> 0 at token 21056 sub_idx=21 pixel=[185.0,105.0]
1074
+ <cand> 1 at token 21104 sub_idx=21 pixel=[153.0,152.0]
1075
+ <cand> 2 at token 21152 sub_idx=21 pixel=[124.0,194.0]
1076
+ <cand> 3 at token 21200 sub_idx=21 pixel=[130.0,188.0]
1077
+ <cand> 4 at token 21248 sub_idx=21 pixel=[236.0,263.0]
1078
+ <cand> 5 at token 21296 sub_idx=21 pixel=[168.0,195.0]
1079
+ <cand> 0 at token 21810 sub_idx=22 pixel=[281.0,273.0]
1080
+ <cand> 1 at token 21858 sub_idx=22 pixel=[164.0,266.0]
1081
+ <cand> 2 at token 21906 sub_idx=22 pixel=[225.0,165.0]
1082
+ <cand> 3 at token 21954 sub_idx=22 pixel=[199.0,132.0]
1083
+ <cand> 4 at token 22002 sub_idx=22 pixel=[194.0,199.0]
1084
+ <cand> 5 at token 22050 sub_idx=22 pixel=[139.0,173.0]
1085
+ <cand> 6 at token 22098 sub_idx=22 pixel=[149.0,190.0]
1086
+ <cand> 7 at token 22146 sub_idx=22 pixel=[262.0,291.0]
1087
+ <cand> 8 at token 22194 sub_idx=22 pixel=[189.0,155.0]
1088
+ <cand> 9 at token 22242 sub_idx=22 pixel=[294.0,295.0]
1089
+ <cand> 0 at token 22757 sub_idx=23 pixel=[246.0,171.0]
1090
+ <cand> 1 at token 22805 sub_idx=23 pixel=[247.0,101.0]
1091
+ <cand> 2 at token 22853 sub_idx=23 pixel=[236.0,329.0]
1092
+ <cand> 3 at token 22901 sub_idx=23 pixel=[288.0,203.0]
1093
+ <cand> 4 at token 22949 sub_idx=23 pixel=[255.0,12.0]
1094
+ <cand> 5 at token 22997 sub_idx=23 pixel=[282.0,81.0]
1095
+ <cand> 6 at token 23045 sub_idx=23 pixel=[271.0,100.0]
1096
+ <cand> 7 at token 23093 sub_idx=23 pixel=[205.0,246.0]
1097
+ <cand> 0 at token 23605 sub_idx=24 pixel=[296.0,266.0]
1098
+ <cand> 1 at token 23653 sub_idx=24 pixel=[283.0,192.0]
1099
+ <cand> 2 at token 23701 sub_idx=24 pixel=[296.0,78.0]
1100
+ <cand> 3 at token 23749 sub_idx=24 pixel=[344.0,184.0]
1101
+ <cand> 4 at token 23797 sub_idx=24 pixel=[359.0,136.0]
1102
+ <cand> 5 at token 23845 sub_idx=24 pixel=[242.0,146.0]
1103
+ <cand> 6 at token 23893 sub_idx=24 pixel=[316.0,302.0]
1104
+ <cand> 7 at token 23941 sub_idx=24 pixel=[270.0,103.0]
1105
+ <cand> 0 at token 24455 sub_idx=25 pixel=[267.0,303.0]
1106
+ <cand> 1 at token 24503 sub_idx=25 pixel=[199.0,248.0]
1107
+ <e_cand> 0 at token 490 sub_idx=0 (same embed)
1108
+ <e_cand> 1 at token 538 sub_idx=0 (same embed)
1109
+ <e_cand> 2 at token 586 sub_idx=0 (same embed)
1110
+ <e_cand> 0 at token 1198 sub_idx=1 (same embed)
1111
+ <e_cand> 1 at token 1246 sub_idx=1 (same embed)
1112
+ <e_cand> 2 at token 1294 sub_idx=1 (same embed)
1113
+ <e_cand> 0 at token 2290 sub_idx=2 (same embed)
1114
+ <e_cand> 1 at token 2338 sub_idx=2 (same embed)
1115
+ <e_cand> 2 at token 2386 sub_idx=2 (same embed)
1116
+ <e_cand> 0 at token 3091 sub_idx=3 (same embed)
1117
+ <e_cand> 1 at token 3139 sub_idx=3 (same embed)
1118
+ <e_cand> 2 at token 3187 sub_idx=3 (same embed)
1119
+ <e_cand> 0 at token 3941 sub_idx=4 (same embed)
1120
+ <e_cand> 1 at token 3989 sub_idx=4 (same embed)
1121
+ <e_cand> 2 at token 4037 sub_idx=4 (same embed)
1122
+ <e_cand> 0 at token 5427 sub_idx=5 (same embed)
1123
+ <e_cand> 1 at token 5475 sub_idx=5 (same embed)
1124
+ <e_cand> 2 at token 5523 sub_idx=5 (same embed)
1125
+ <e_cand> 0 at token 6423 sub_idx=6 (same embed)
1126
+ <e_cand> 1 at token 6471 sub_idx=6 (same embed)
1127
+ <e_cand> 2 at token 6519 sub_idx=6 (same embed)
1128
+ <e_cand> 0 at token 7462 sub_idx=7 (same embed)
1129
+ <e_cand> 1 at token 7510 sub_idx=7 (same embed)
1130
+ <e_cand> 2 at token 7558 sub_idx=7 (same embed)
1131
+ <e_cand> 0 at token 8314 sub_idx=8 (same embed)
1132
+ <e_cand> 1 at token 8362 sub_idx=8 (same embed)
1133
+ <e_cand> 2 at token 8410 sub_idx=8 (same embed)
1134
+ <e_cand> 0 at token 9307 sub_idx=9 (same embed)
1135
+ <e_cand> 1 at token 9355 sub_idx=9 (same embed)
1136
+ <e_cand> 2 at token 9403 sub_idx=9 (same embed)
1137
+ <e_cand> 0 at token 10940 sub_idx=10 (same embed)
1138
+ <e_cand> 1 at token 10988 sub_idx=10 (same embed)
1139
+ <e_cand> 2 at token 11036 sub_idx=10 (same embed)
1140
+ <e_cand> 0 at token 12179 sub_idx=11 (same embed)
1141
+ <e_cand> 1 at token 12227 sub_idx=11 (same embed)
1142
+ <e_cand> 2 at token 12275 sub_idx=11 (same embed)
1143
+ <e_cand> 0 at token 13125 sub_idx=12 (same embed)
1144
+ <e_cand> 1 at token 13173 sub_idx=12 (same embed)
1145
+ <e_cand> 2 at token 13219 sub_idx=12 (same embed)
1146
+ <e_cand> 0 at token 14020 sub_idx=13 (same embed)
1147
+ <e_cand> 1 at token 14068 sub_idx=13 (same embed)
1148
+ <e_cand> 2 at token 14116 sub_idx=13 (same embed)
1149
+ <e_cand> 0 at token 14966 sub_idx=14 (same embed)
1150
+ <e_cand> 1 at token 15014 sub_idx=14 (same embed)
1151
+ <e_cand> 2 at token 15062 sub_idx=14 (same embed)
1152
+ <e_cand> 0 at token 15864 sub_idx=15 (same embed)
1153
+ <e_cand> 1 at token 15912 sub_idx=15 (same embed)
1154
+ <e_cand> 2 at token 15960 sub_idx=15 (same embed)
1155
+ <e_cand> 0 at token 16711 sub_idx=16 (same embed)
1156
+ <e_cand> 1 at token 16759 sub_idx=16 (same embed)
1157
+ <e_cand> 2 at token 16807 sub_idx=16 (same embed)
1158
+ <e_cand> 0 at token 17512 sub_idx=17 (same embed)
1159
+ <e_cand> 1 at token 17558 sub_idx=17 (same embed)
1160
+ <e_cand> 2 at token 17606 sub_idx=17 (same embed)
1161
+ <e_cand> 0 at token 18553 sub_idx=18 (same embed)
1162
+ <e_cand> 1 at token 18601 sub_idx=18 (same embed)
1163
+ <e_cand> 2 at token 18649 sub_idx=18 (same embed)
1164
+ <e_cand> 0 at token 19545 sub_idx=19 (same embed)
1165
+ <e_cand> 1 at token 19593 sub_idx=19 (same embed)
1166
+ <e_cand> 2 at token 19641 sub_idx=19 (same embed)
1167
+ <e_cand> 0 at token 20348 sub_idx=20 (same embed)
1168
+ <e_cand> 1 at token 20396 sub_idx=20 (same embed)
1169
+ <e_cand> 2 at token 20444 sub_idx=20 (same embed)
1170
+ <e_cand> 0 at token 21102 sub_idx=21 (same embed)
1171
+ <e_cand> 1 at token 21150 sub_idx=21 (same embed)
1172
+ <e_cand> 2 at token 21198 sub_idx=21 (same embed)
1173
+ <e_cand> 0 at token 21856 sub_idx=22 (same embed)
1174
+ <e_cand> 1 at token 21904 sub_idx=22 (same embed)
1175
+ <e_cand> 2 at token 21952 sub_idx=22 (same embed)
1176
+ <e_cand> 0 at token 22803 sub_idx=23 (same embed)
1177
+ <e_cand> 1 at token 22851 sub_idx=23 (same embed)
1178
+ <e_cand> 2 at token 22899 sub_idx=23 (same embed)
1179
+ <e_cand> 0 at token 23651 sub_idx=24 (same embed)
1180
+ <e_cand> 1 at token 23699 sub_idx=24 (same embed)
1181
+ <e_cand> 2 at token 23747 sub_idx=24 (same embed)
1182
+ <e_cand> 0 at token 24501 sub_idx=25 (same embed)
1183
+ <e_cand> 1 at token 24549 sub_idx=25 (same embed)
1184
+
1185
+ [2026-02-25 06:18:21,881] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.45 | optimizer_gradients: 1.44 | optimizer_step: 0.87
1186
+ [2026-02-25 06:18:21,883] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 7101.76 | bwd_microstep: 10975.31 | bwd_inner_microstep: 8633.30 | bwd_allreduce_microstep: 2341.92 | step_microstep: 91.95
1187
+ [2026-02-25 06:18:21,884] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 7101.77 | bwd: 10975.31 | bwd_inner: 8633.29 | bwd_allreduce: 2341.94 | step: 91.96
1188
+ {'loss': 1.5964, 'learning_rate': 9.09090909090909e-07, 'epoch': 0.0}
1189
+ [2026-02-25 06:18:39,023] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.56 | optimizer_gradients: 2.00 | optimizer_step: 0.95
1190
+ [2026-02-25 06:18:39,025] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 8051.87 | bwd_microstep: 7784.85 | bwd_inner_microstep: 6309.51 | bwd_allreduce_microstep: 1475.23 | step_microstep: 593.61
1191
+ [2026-02-25 06:18:39,026] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 8051.89 | bwd: 7784.86 | bwd_inner: 6309.50 | bwd_allreduce: 1475.26 | step: 593.61
1192
+ {'loss': 1.659, 'learning_rate': 1.2121212121212122e-06, 'epoch': 0.0}
1193
+ [2026-02-25 06:18:52,805] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.19 | optimizer_gradients: 1.10 | optimizer_step: 0.81
1194
+ [2026-02-25 06:18:52,806] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6282.63 | bwd_microstep: 6950.30 | bwd_inner_microstep: 5828.60 | bwd_allreduce_microstep: 1121.60 | step_microstep: 18.40
1195
+ [2026-02-25 06:18:52,806] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6282.64 | bwd: 6950.29 | bwd_inner: 5828.59 | bwd_allreduce: 1121.62 | step: 18.40
1196
+ {'loss': 1.5848, 'learning_rate': 1.5151515151515152e-06, 'epoch': 0.0}
1197
+ [2026-02-25 06:19:05,866] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.29 | optimizer_gradients: 1.11 | optimizer_step: 0.81
1198
+ [2026-02-25 06:19:05,867] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6410.25 | bwd_microstep: 6271.65 | bwd_inner_microstep: 6195.07 | bwd_allreduce_microstep: 76.49 | step_microstep: 18.83
1199
+ [2026-02-25 06:19:05,867] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6410.27 | bwd: 6271.65 | bwd_inner: 6195.06 | bwd_allreduce: 76.51 | step: 18.84
1200
+ {'loss': 1.6153, 'learning_rate': 1.818181818181818e-06, 'epoch': 0.0}
1201
+ [2026-02-25 06:19:18,839] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.14 | optimizer_gradients: 1.05 | optimizer_step: 0.82
1202
+ [2026-02-25 06:19:18,840] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6342.45 | bwd_microstep: 6246.72 | bwd_inner_microstep: 6242.33 | bwd_allreduce_microstep: 4.30 | step_microstep: 18.15
1203
+ [2026-02-25 06:19:18,840] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6342.46 | bwd: 6246.72 | bwd_inner: 6242.32 | bwd_allreduce: 4.32 | step: 18.16
1204
+ {'loss': 1.6416, 'learning_rate': 2.1212121212121216e-06, 'epoch': 0.0}
1205
+ [2026-02-25 06:19:31,976] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.14 | optimizer_gradients: 1.07 | optimizer_step: 0.83
1206
+ [2026-02-25 06:19:31,978] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6311.47 | bwd_microstep: 6453.40 | bwd_inner_microstep: 5833.63 | bwd_allreduce_microstep: 619.70 | step_microstep: 18.15
1207
+ [2026-02-25 06:19:31,978] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6311.47 | bwd: 6453.41 | bwd_inner: 5833.62 | bwd_allreduce: 619.72 | step: 18.16
1208
+ {'loss': 1.5499, 'learning_rate': 2.4242424242424244e-06, 'epoch': 0.0}
1209
+ [2026-02-25 06:19:44,893] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.14 | optimizer_gradients: 1.15 | optimizer_step: 0.83
1210
+ [2026-02-25 06:19:44,894] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6273.18 | bwd_microstep: 6234.87 | bwd_inner_microstep: 6058.88 | bwd_allreduce_microstep: 175.92 | step_microstep: 21.21
1211
+ [2026-02-25 06:19:44,895] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6273.18 | bwd: 6234.87 | bwd_inner: 6058.87 | bwd_allreduce: 175.93 | step: 21.22
1212
+ {'loss': 1.4567, 'learning_rate': 2.7272727272727272e-06, 'epoch': 0.0}
1213
+ [2026-02-25 06:19:57,940] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.25 | optimizer_gradients: 1.12 | optimizer_step: 0.83
1214
+ [2026-02-25 06:19:57,941] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6458.85 | bwd_microstep: 6214.52 | bwd_inner_microstep: 6210.23 | bwd_allreduce_microstep: 4.20 | step_microstep: 19.43
1215
+ [2026-02-25 06:19:57,942] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6458.86 | bwd: 6214.52 | bwd_inner: 6210.23 | bwd_allreduce: 4.21 | step: 19.43
1216
+ {'loss': 1.3746, 'learning_rate': 3.0303030303030305e-06, 'epoch': 0.0}
1217
+ [2026-02-25 06:20:10,944] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.22 | optimizer_gradients: 1.08 | optimizer_step: 0.82
1218
+ [2026-02-25 06:20:10,946] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6353.43 | bwd_microstep: 6286.84 | bwd_inner_microstep: 6182.82 | bwd_allreduce_microstep: 103.94 | step_microstep: 19.10
1219
+ [2026-02-25 06:20:10,946] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6353.43 | bwd: 6286.84 | bwd_inner: 6182.81 | bwd_allreduce: 103.96 | step: 19.10
1220
+ {'loss': 1.2777, 'learning_rate': 3.3333333333333333e-06, 'epoch': 0.0}
1221
+ [2026-02-25 06:20:23,892] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.24 | optimizer_gradients: 1.05 | optimizer_step: 0.82
1222
+ [2026-02-25 06:20:23,893] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6333.80 | bwd_microstep: 6194.44 | bwd_inner_microstep: 6190.09 | bwd_allreduce_microstep: 4.27 | step_microstep: 19.16
1223
+ [2026-02-25 06:20:23,894] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6333.80 | bwd: 6194.45 | bwd_inner: 6190.08 | bwd_allreduce: 4.29 | step: 19.16
1224
+ {'loss': 1.3564, 'learning_rate': 3.636363636363636e-06, 'epoch': 0.0}
1225
+ [2026-02-25 06:20:36,837] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.35 | optimizer_gradients: 1.09 | optimizer_step: 0.81
1226
+ [2026-02-25 06:20:36,839] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6388.67 | bwd_microstep: 6187.08 | bwd_inner_microstep: 6157.39 | bwd_allreduce_microstep: 29.60 | step_microstep: 20.79
1227
+ [2026-02-25 06:20:36,839] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6388.68 | bwd: 6187.08 | bwd_inner: 6157.38 | bwd_allreduce: 29.62 | step: 20.79
1228
+ {'loss': 1.275, 'learning_rate': 3.939393939393939e-06, 'epoch': 0.0}
1229
+ [2026-02-25 06:20:49,744] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.37 | optimizer_gradients: 1.12 | optimizer_step: 0.75
1230
+ [2026-02-25 06:20:49,745] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6251.74 | bwd_microstep: 6165.91 | bwd_inner_microstep: 6117.25 | bwd_allreduce_microstep: 48.59 | step_microstep: 19.19
1231
+ [2026-02-25 06:20:49,745] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6251.75 | bwd: 6165.92 | bwd_inner: 6117.23 | bwd_allreduce: 48.61 | step: 19.19
1232
+ {'loss': 1.2457, 'learning_rate': 4.242424242424243e-06, 'epoch': 0.0}
1233
+ [2026-02-25 06:21:02,658] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.38 | optimizer_gradients: 1.08 | optimizer_step: 0.80
1234
+ [2026-02-25 06:21:02,659] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6326.26 | bwd_microstep: 6123.43 | bwd_inner_microstep: 6082.66 | bwd_allreduce_microstep: 40.68 | step_microstep: 19.10
1235
+ [2026-02-25 06:21:02,659] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6326.27 | bwd: 6123.42 | bwd_inner: 6082.65 | bwd_allreduce: 40.70 | step: 19.10
1236
+ {'loss': 1.2058, 'learning_rate': 4.5454545454545455e-06, 'epoch': 0.0}
1237
+ [2026-02-25 06:21:15,601] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.30 | optimizer_gradients: 1.12 | optimizer_step: 0.79
1238
+ [2026-02-25 06:21:15,602] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6394.92 | bwd_microstep: 6104.41 | bwd_inner_microstep: 6100.11 | bwd_allreduce_microstep: 4.21 | step_microstep: 18.76
1239
+ [2026-02-25 06:21:15,603] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6394.93 | bwd: 6104.40 | bwd_inner: 6100.10 | bwd_allreduce: 4.22 | step: 18.76
1240
+ {'loss': 1.1158, 'learning_rate': 4.848484848484849e-06, 'epoch': 0.0}
1241
+ [2026-02-25 06:21:28,722] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.22 | optimizer_gradients: 1.14 | optimizer_step: 0.84
1242
+ [2026-02-25 06:21:28,724] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6421.19 | bwd_microstep: 6232.89 | bwd_inner_microstep: 6228.60 | bwd_allreduce_microstep: 4.21 | step_microstep: 22.74
1243
+ [2026-02-25 06:21:28,724] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6421.21 | bwd: 6232.89 | bwd_inner: 6228.59 | bwd_allreduce: 4.22 | step: 22.82
1244
+ {'loss': 0.985, 'learning_rate': 5.151515151515152e-06, 'epoch': 0.0}
1245
+ [2026-02-25 06:21:41,721] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.23 | optimizer_gradients: 1.22 | optimizer_step: 0.87
1246
+ [2026-02-25 06:21:41,723] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6388.94 | bwd_microstep: 6156.86 | bwd_inner_microstep: 6152.26 | bwd_allreduce_microstep: 4.52 | step_microstep: 20.28
1247
+ [2026-02-25 06:21:41,723] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6388.94 | bwd: 6156.86 | bwd_inner: 6152.25 | bwd_allreduce: 4.54 | step: 20.28
1248
+ {'loss': 0.992, 'learning_rate': 5.4545454545454545e-06, 'epoch': 0.0}
1249
+ [2026-02-25 06:21:54,724] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.24 | optimizer_gradients: 1.14 | optimizer_step: 0.82
1250
+ [2026-02-25 06:21:54,725] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6390.08 | bwd_microstep: 6187.04 | bwd_inner_microstep: 6182.74 | bwd_allreduce_microstep: 4.21 | step_microstep: 18.93
1251
+ [2026-02-25 06:21:54,725] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6390.09 | bwd: 6187.04 | bwd_inner: 6182.73 | bwd_allreduce: 4.23 | step: 18.98
1252
+ {'loss': 0.8835, 'learning_rate': 5.7575757575757586e-06, 'epoch': 0.0}
1253
+ [2026-02-25 06:22:07,716] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.27 | optimizer_gradients: 1.15 | optimizer_step: 0.81
1254
+ [2026-02-25 06:22:07,718] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6356.84 | bwd_microstep: 6207.92 | bwd_inner_microstep: 6092.72 | bwd_allreduce_microstep: 115.11 | step_microstep: 18.93
1255
+ [2026-02-25 06:22:07,718] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6356.85 | bwd: 6207.92 | bwd_inner: 6092.70 | bwd_allreduce: 115.13 | step: 18.93
1256
+ {'loss': 0.992, 'learning_rate': 6.060606060606061e-06, 'epoch': 0.0}
1257
+ [2026-02-25 06:22:20,554] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | optimizer_allgather: 1.24 | optimizer_gradients: 1.15 | optimizer_step: 0.82
1258
+ [2026-02-25 06:22:20,556] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd_microstep: 6331.32 | bwd_microstep: 6050.91 | bwd_inner_microstep: 6011.84 | bwd_allreduce_microstep: 38.99 | step_microstep: 20.64
1259
+ [2026-02-25 06:22:20,556] [INFO] [logging.py:96:log_dist] [Rank 0] time (ms) | fwd: 6331.33 | bwd: 6050.91 | bwd_inner: 6011.83 | bwd_allreduce: 39.00 | step: 20.64
1260
+ {'loss': 0.939, 'learning_rate': 6.363636363636363e-06, 'epoch': 0.0}
1261
+ File "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py", line 764, in <module>
1262
+ main()
1263
+ File "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py", line 749, in main
1264
+ train_result = trainer.train(resume_from_checkpoint=checkpoint)
1265
+ File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/transformers/trainer.py", line 1539, in train
1266
+ return inner_training_loop(
1267
+ File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/transformers/trainer.py", line 1836, in _inner_training_loop
1268
+ for step, inputs in enumerate(epoch_iterator):
1269
+ File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 701, in __next__
1270
+ data = self._next_data()
1271
+ File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1445, in _next_data
1272
+ return self._process_data(data)
1273
+ File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1491, in _process_data
1274
+ data.reraise()
1275
+ File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/_utils.py", line 715, in reraise
1276
+ raise exception
1277
+ RuntimeError: Caught RuntimeError in pin memory thread for device 0.
1278
+ Original Traceback (most recent call last):
1279
+ File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/_utils/pin_memory.py", line 41, in do_one_step
1280
+ data = pin_memory(data, device)
1281
+ File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/_utils/pin_memory.py", line 75, in pin_memory
1282
+ {k: pin_memory(sample, device) for k, sample in data.items()}
1283
+ File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/_utils/pin_memory.py", line 75, in <dictcomp>
1284
+ {k: pin_memory(sample, device) for k, sample in data.items()}
1285
+ File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/_utils/pin_memory.py", line 64, in pin_memory
1286
+ return data.pin_memory(device)
1287
+ RuntimeError: CUDA error: out of memory
1288
+ CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
1289
+ For debugging consider passing CUDA_LAUNCH_BLOCKING=1
1290
+ Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
1291
+
1292
+
1293
+ [rank0]: Traceback (most recent call last):
1294
+ [rank0]: File "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py", line 764, in <module>
1295
+ [rank0]: main()
1296
+ [rank0]: File "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py", line 749, in main
1297
+ [rank0]: train_result = trainer.train(resume_from_checkpoint=checkpoint)
1298
+ [rank0]: File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/transformers/trainer.py", line 1539, in train
1299
+ [rank0]: return inner_training_loop(
1300
+ [rank0]: File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/transformers/trainer.py", line 1836, in _inner_training_loop
1301
+ [rank0]: for step, inputs in enumerate(epoch_iterator):
1302
+ [rank0]: File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 701, in __next__
1303
+ [rank0]: data = self._next_data()
1304
+ [rank0]: File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1445, in _next_data
1305
+ [rank0]: return self._process_data(data)
1306
+ [rank0]: File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1491, in _process_data
1307
+ [rank0]: data.reraise()
1308
+ [rank0]: File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/_utils.py", line 715, in reraise
1309
+ [rank0]: raise exception
1310
+ [rank0]: RuntimeError: Caught RuntimeError in pin memory thread for device 0.
1311
+ [rank0]: Original Traceback (most recent call last):
1312
+ [rank0]: File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/_utils/pin_memory.py", line 41, in do_one_step
1313
+ [rank0]: data = pin_memory(data, device)
1314
+ [rank0]: File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/_utils/pin_memory.py", line 75, in pin_memory
1315
+ [rank0]: {k: pin_memory(sample, device) for k, sample in data.items()}
1316
+ [rank0]: File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/_utils/pin_memory.py", line 75, in <dictcomp>
1317
+ [rank0]: {k: pin_memory(sample, device) for k, sample in data.items()}
1318
+ [rank0]: File "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/lib/python3.10/site-packages/torch/utils/data/_utils/pin_memory.py", line 64, in pin_memory
1319
+ [rank0]: return data.pin_memory(device)
1320
+ [rank0]: RuntimeError: CUDA error: out of memory
1321
+ [rank0]: CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
1322
+ [rank0]: For debugging consider passing CUDA_LAUNCH_BLOCKING=1
1323
+ [rank0]: Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
wandb/run-20260225_055938-u2vegsv1/files/requirements.txt ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cuda-runtime-cu12==12.1.105
2
+ platformdirs==4.9.2
3
+ wandb==0.22.3
4
+ transformers==4.37.2
5
+ einops==0.8.2
6
+ accelerate==0.33.0
7
+ aiosignal==1.4.0
8
+ huggingface_hub==0.36.2
9
+ pycparser==2.22
10
+ multidict==6.7.1
11
+ urllib3==2.6.3
12
+ urllib3==2.5.0
13
+ aiohappyeyeballs==2.6.1
14
+ nvidia-cusolver-cu12==11.4.5.107
15
+ yarl==1.22.0
16
+ typing-inspection==0.4.2
17
+ gitdb==4.0.12
18
+ multiprocess==0.70.16
19
+ async-timeout==5.0.1
20
+ Jinja2==3.1.6
21
+ setuptools==82.0.0
22
+ torch==2.5.1+cu121
23
+ hyperframe==6.1.0
24
+ peft==0.10.0
25
+ tzdata==2025.3
26
+ dill==0.3.8
27
+ orjson==3.11.7
28
+ nvidia-cufft-cu12==11.0.2.54
29
+ propcache==0.4.1
30
+ PySocks==1.7.1
31
+ nvidia-cublas-cu12==12.1.3.1
32
+ nvidia-nccl-cu12==2.21.5
33
+ docker-pycreds==0.4.0
34
+ pydantic_core==2.41.5
35
+ nvidia-ml-py==13.590.48
36
+ ninja==1.13.0
37
+ cffi==1.15.0
38
+ nvidia-cuda-cupti-cu12==12.1.105
39
+ triton==3.1.0
40
+ annotated-types==0.7.0
41
+ nvidia-nvtx-cu12==12.1.105
42
+ timm==1.0.22
43
+ sympy==1.13.1
44
+ pydantic==2.12.5
45
+ xxhash==3.6.0
46
+ py-cpuinfo==9.0.0
47
+ sentry-sdk==2.53.0
48
+ networkx==3.4.2
49
+ click==8.3.1
50
+ regex==2026.1.15
51
+ pillow==12.0.0
52
+ zstandard==0.23.0
53
+ pyarrow==20.0.0
54
+ GitPython==3.1.46
55
+ wheel==0.46.3
56
+ fsspec==2025.3.0
57
+ typing_extensions==4.15.0
58
+ hf-xet==1.2.0
59
+ torchvision==0.20.1+cu121
60
+ python-dateutil==2.9.0.post0
61
+ numpy==1.26.4
62
+ smmap==5.0.2
63
+ setproctitle==1.2.3
64
+ safetensors==0.7.0
65
+ requests==2.32.5
66
+ hpack==4.1.0
67
+ six==1.17.0
68
+ modelscope==1.34.0
69
+ pip==26.0.1
70
+ h2==4.3.0
71
+ PyYAML==6.0.3
72
+ PyYAML==6.0
73
+ torchaudio==2.5.1+cu121
74
+ nvidia-nvjitlink-cu12==12.9.86
75
+ psutil==7.2.2
76
+ psutil==5.9.1
77
+ pandas==2.3.2
78
+ pytz==2025.2
79
+ nvidia-cudnn-cu12==9.1.0.70
80
+ packaging==25.0
81
+ hjson==3.1.0
82
+ deepspeed==0.14.4
83
+ datasets==4.0.0
84
+ nvidia-cuda-nvrtc-cu12==12.1.105
85
+ tokenizers==0.15.2
86
+ MarkupSafe==2.1.5
87
+ charset-normalizer==3.4.4
88
+ frozenlist==1.8.0
89
+ Brotli==1.0.9
90
+ certifi==2026.1.4
91
+ mpmath==1.3.0
92
+ protobuf==3.20.1
93
+ tqdm==4.67.3
94
+ nvidia-cusparse-cu12==12.1.0.106
95
+ attrs==25.4.0
96
+ appdirs==1.4.4
97
+ flash_attn==2.7.4.post1
98
+ aiohttp==3.13.3
99
+ filelock==3.20.0
100
+ idna==3.11
101
+ nvidia-curand-cu12==10.3.2.106
102
+ zipp==3.23.0
103
+ backports.tarfile==1.2.0
104
+ autocommand==2.2.2
105
+ platformdirs==4.4.0
106
+ jaraco.text==4.0.0
107
+ more-itertools==10.8.0
108
+ jaraco.context==6.1.0
109
+ jaraco.functools==4.4.0
110
+ wheel==0.46.3
111
+ tomli==2.4.0
112
+ packaging==26.0
113
+ importlib_metadata==8.7.1
wandb/run-20260225_055938-u2vegsv1/files/wandb-metadata.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-3.10.0-957.el7.x86_64-x86_64-with-glibc2.17",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2026-02-24T21:59:38.884470Z",
5
+ "args": [
6
+ "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/resolved_train_config.json"
7
+ ],
8
+ "program": "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py",
9
+ "codePath": "InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py",
10
+ "codePathLocal": "internvl_cleaned/train/internvl_chat_finetune.py",
11
+ "git": {
12
+ "remote": "git@github.com:Yuxin916/CL_CoTNav.git",
13
+ "commit": "f7d6fbe6a8031d29a95c2f3be79e99f96670a12e"
14
+ },
15
+ "email": "caiy0039@e.ntu.edu.sg",
16
+ "root": "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY",
17
+ "host": "SH-IDC1-10-140-37-45",
18
+ "executable": "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/bin/python3.10",
19
+ "cpu_count": 64,
20
+ "cpu_count_logical": 128,
21
+ "gpu": "NVIDIA A100-SXM4-80GB",
22
+ "gpu_count": 8,
23
+ "disk": {
24
+ "/": {
25
+ "total": "524945911808",
26
+ "used": "39460401152"
27
+ }
28
+ },
29
+ "memory": {
30
+ "total": "1081627828224"
31
+ },
32
+ "gpu_nvidia": [
33
+ {
34
+ "name": "NVIDIA A100-SXM4-80GB",
35
+ "memoryTotal": "85899345920",
36
+ "cudaCores": 6912,
37
+ "architecture": "Ampere",
38
+ "uuid": "GPU-9e78ad4b-b304-a199-38f4-24b3acd9b531"
39
+ },
40
+ {
41
+ "name": "NVIDIA A100-SXM4-80GB",
42
+ "memoryTotal": "85899345920",
43
+ "cudaCores": 6912,
44
+ "architecture": "Ampere",
45
+ "uuid": "GPU-19c9429b-5ecf-0b76-7d81-03c36a449f32"
46
+ },
47
+ {
48
+ "name": "NVIDIA A100-SXM4-80GB",
49
+ "memoryTotal": "85899345920",
50
+ "cudaCores": 6912,
51
+ "architecture": "Ampere",
52
+ "uuid": "GPU-57520d78-d04a-3028-bd40-5ebd78e123e6"
53
+ },
54
+ {
55
+ "name": "NVIDIA A100-SXM4-80GB",
56
+ "memoryTotal": "85899345920",
57
+ "cudaCores": 6912,
58
+ "architecture": "Ampere",
59
+ "uuid": "GPU-0d94270a-8ad3-b3a6-4acd-145fccc36d85"
60
+ },
61
+ {
62
+ "name": "NVIDIA A100-SXM4-80GB",
63
+ "memoryTotal": "85899345920",
64
+ "cudaCores": 6912,
65
+ "architecture": "Ampere",
66
+ "uuid": "GPU-b9b6fe80-e37a-b622-0deb-27ef46a965ff"
67
+ },
68
+ {
69
+ "name": "NVIDIA A100-SXM4-80GB",
70
+ "memoryTotal": "85899345920",
71
+ "cudaCores": 6912,
72
+ "architecture": "Ampere",
73
+ "uuid": "GPU-7b870038-dbe6-4039-b6bf-f90517f43f6c"
74
+ },
75
+ {
76
+ "name": "NVIDIA A100-SXM4-80GB",
77
+ "memoryTotal": "85899345920",
78
+ "cudaCores": 6912,
79
+ "architecture": "Ampere",
80
+ "uuid": "GPU-e3065d01-9539-64c9-417f-91273b521051"
81
+ },
82
+ {
83
+ "name": "NVIDIA A100-SXM4-80GB",
84
+ "memoryTotal": "85899345920",
85
+ "cudaCores": 6912,
86
+ "architecture": "Ampere",
87
+ "uuid": "GPU-28ce6eed-85b8-155a-5a5d-412dfb1dd1c0"
88
+ }
89
+ ],
90
+ "cudaVersion": "12.2",
91
+ "slurm": {
92
+ "cluster_name": "cluster_sproject3",
93
+ "conf": "/etc/slurm/slurm.conf",
94
+ "cpus_on_node": "128",
95
+ "cpus_per_task": "8",
96
+ "distribution": "cyclic",
97
+ "gtids": "0",
98
+ "job_account": "research",
99
+ "job_cpus_per_node": "128",
100
+ "job_cpus_per_node_pack_group_0": "128",
101
+ "job_gid": "200000139",
102
+ "job_gpus": "0,1,2,3,4,5,6,7",
103
+ "job_id": "7464467",
104
+ "job_name": "vlm_ft",
105
+ "job_nodelist": "SH-IDC1-10-140-37-45",
106
+ "job_num_nodes": "1",
107
+ "job_partition": "interntmp",
108
+ "job_qos": "normal",
109
+ "job_uid": "200000139",
110
+ "job_user": "wangmaonan",
111
+ "jobid": "7464467",
112
+ "launch_node_ipaddr": "10.140.37.45",
113
+ "localid": "0",
114
+ "mem_per_node": "49152",
115
+ "nnodes": "1",
116
+ "node_aliases": "(null)",
117
+ "nodeid": "0",
118
+ "nodelist": "SH-IDC1-10-140-37-45",
119
+ "nprocs": "1",
120
+ "ntasks": "1",
121
+ "ntasks_per_node": "1",
122
+ "prio_process": "0",
123
+ "procid": "0",
124
+ "srun_comm_host": "10.140.37.45",
125
+ "srun_comm_port": "44377",
126
+ "step_gpus": "0,1,2,3",
127
+ "step_id": "0",
128
+ "step_launcher_port": "44377",
129
+ "step_nodelist": "SH-IDC1-10-140-37-45",
130
+ "step_num_nodes": "1",
131
+ "step_num_tasks": "1",
132
+ "step_tasks_per_node": "1",
133
+ "stepid": "0",
134
+ "submit_dir": "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav",
135
+ "submit_host": "SH-IDC1-10-140-37-45",
136
+ "task_pid": "151799",
137
+ "tasks_per_node": "1",
138
+ "topology_addr": "SH-IDC1-10-140-37-45",
139
+ "topology_addr_pattern": "node",
140
+ "umask": "0002",
141
+ "working_cluster": "cluster_sproject3:SH-IDC1-10-140-37-161:6817:9216:109"
142
+ },
143
+ "writerId": "lcgegjlerikh15wodksbpfheadlvwekl"
144
+ }
wandb/run-20260225_055938-u2vegsv1/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/epoch":0,"train/global_step":21,"_wandb":{"runtime":1360},"_runtime":1360,"_timestamp":1.7719717405855756e+09,"_step":20,"train/loss":0.939,"train/learning_rate":6.363636363636363e-06}
wandb/run-20260225_055938-u2vegsv1/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-02-25T05:59:39.04618224+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp546zrv3n/port-154283.txt","pid":154283,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2026-02-25T05:59:39.04849989+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":154283}
3
+ {"time":"2026-02-25T05:59:39.048638993+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-154283-157255-155222642/socket","Net":"unix"}}
4
+ {"time":"2026-02-25T05:59:39.129695518+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2026-02-25T05:59:39.140549387+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"u2vegsv1","id":"1(@)"}
6
+ {"time":"2026-02-25T05:59:39.556207953+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"u2vegsv1","id":"1(@)"}
7
+ {"time":"2026-02-25T06:22:20.609960124+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2026-02-25T06:22:20.611818219+08:00","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2026-02-25T06:22:20.611755671+08:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
10
+ {"time":"2026-02-25T06:22:20.612915442+08:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-154283-157255-155222642/socket","Net":"unix"}}
11
+ {"time":"2026-02-25T06:22:20.613364891+08:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
12
+ {"time":"2026-02-25T06:22:22.357517871+08:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2026-02-25T06:22:22.35822517+08:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2026-02-25T06:22:22.358768645+08:00","level":"INFO","msg":"server is closed"}
wandb/run-20260225_055938-u2vegsv1/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-02-25T05:59:39.14150883+08:00","level":"INFO","msg":"stream: starting","core version":"0.22.3"}
2
+ {"time":"2026-02-25T05:59:39.552925091+08:00","level":"INFO","msg":"stream: created new stream","id":"u2vegsv1"}
3
+ {"time":"2026-02-25T05:59:39.553478946+08:00","level":"INFO","msg":"handler: started","stream_id":"u2vegsv1"}
4
+ {"time":"2026-02-25T05:59:39.555784131+08:00","level":"INFO","msg":"stream: started","id":"u2vegsv1"}
5
+ {"time":"2026-02-25T05:59:39.555788317+08:00","level":"INFO","msg":"writer: started","stream_id":"u2vegsv1"}
6
+ {"time":"2026-02-25T05:59:39.555793631+08:00","level":"INFO","msg":"sender: started","stream_id":"u2vegsv1"}
7
+ {"time":"2026-02-25T06:22:20.612437616+08:00","level":"INFO","msg":"stream: closing","id":"u2vegsv1"}
8
+ {"time":"2026-02-25T06:22:21.743357445+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2026-02-25T06:22:22.353551338+08:00","level":"INFO","msg":"handler: closed","stream_id":"u2vegsv1"}
10
+ {"time":"2026-02-25T06:22:22.355293739+08:00","level":"INFO","msg":"sender: closed","stream_id":"u2vegsv1"}
11
+ {"time":"2026-02-25T06:22:22.355734583+08:00","level":"INFO","msg":"stream: closed","id":"u2vegsv1"}
wandb/run-20260225_055938-u2vegsv1/logs/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-02-25 05:59:38,900 INFO MainThread:154283 [wandb_setup.py:_flush():81] Current SDK version is 0.22.3
2
+ 2026-02-25 05:59:38,902 INFO MainThread:154283 [wandb_setup.py:_flush():81] Configure stats pid to 154283
3
+ 2026-02-25 05:59:38,903 INFO MainThread:154283 [wandb_setup.py:_flush():81] Loading settings from /mnt/petrelfs/wangmaonan/.config/wandb/settings
4
+ 2026-02-25 05:59:38,903 INFO MainThread:154283 [wandb_setup.py:_flush():81] Loading settings from /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/wandb/settings
5
+ 2026-02-25 05:59:38,904 INFO MainThread:154283 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2026-02-25 05:59:38,904 INFO MainThread:154283 [wandb_init.py:setup_run_log_directory():706] Logging user logs to /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/wandb/run-20260225_055938-u2vegsv1/logs/debug.log
7
+ 2026-02-25 05:59:38,905 INFO MainThread:154283 [wandb_init.py:setup_run_log_directory():707] Logging internal logs to /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/wandb/run-20260225_055938-u2vegsv1/logs/debug-internal.log
8
+ 2026-02-25 05:59:38,905 INFO MainThread:154283 [wandb_init.py:init():833] calling init triggers
9
+ 2026-02-25 05:59:38,906 INFO MainThread:154283 [wandb_init.py:init():838] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2026-02-25 05:59:38,906 INFO MainThread:154283 [wandb_init.py:init():881] starting backend
12
+ 2026-02-25 05:59:39,129 INFO MainThread:154283 [wandb_init.py:init():884] sending inform_init request
13
+ 2026-02-25 05:59:39,136 INFO MainThread:154283 [wandb_init.py:init():892] backend started and connected
14
+ 2026-02-25 05:59:39,138 INFO MainThread:154283 [wandb_init.py:init():962] updated telemetry
15
+ 2026-02-25 05:59:39,163 INFO MainThread:154283 [wandb_init.py:init():986] communicating run to backend with 90.0 second timeout
16
+ 2026-02-25 05:59:40,117 INFO MainThread:154283 [wandb_init.py:init():1033] starting run threads in backend
17
+ 2026-02-25 05:59:40,400 INFO MainThread:154283 [wandb_run.py:_console_start():2506] atexit reg
18
+ 2026-02-25 05:59:40,400 INFO MainThread:154283 [wandb_run.py:_redirect():2354] redirect: wrap_raw
19
+ 2026-02-25 05:59:40,401 INFO MainThread:154283 [wandb_run.py:_redirect():2423] Wrapping output streams.
20
+ 2026-02-25 05:59:40,401 INFO MainThread:154283 [wandb_run.py:_redirect():2446] Redirects installed.
21
+ 2026-02-25 05:59:40,407 INFO MainThread:154283 [wandb_init.py:init():1073] run started, returning control to user process
22
+ 2026-02-25 05:59:40,410 INFO MainThread:154283 [wandb_run.py:_config_callback():1390] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['InternVLChatModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': None, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '../pretrained/InternVL3-2B', '_commit_hash': None, '_attn_implementation_internal': None, 'transformers_version': None, 'auto_map': {'AutoConfig': 'configuration_internvl_chat.InternVLChatConfig', 'AutoModel': 'modeling_internvl_chat.InternVLChatModel', 'AutoModelForCausalLM': 'modeling_internvl_chat.InternVLChatModel'}, 'hidden_size': 1536, 'image_fold': None, 'model_type': 'internvl_chat', 'system_message': 'You are an autonomous navigation agent operating in indoor environments. You receive spatial information through position embeddings injected into visual features and text tokens. Use the BEV map, position embeddings, and semantic information to make navigation decisions. When the target object is detected (<target> marker), navigate directly to it. Otherwise, explore frontiers strategically to find the goal object.', 'vision_config': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': True, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['InternVisionModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': None, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'OpenGVLab/InternViT-6B-448px-V1-5', 'transformers_version': '4.37.2', '_attn_implementation_autoset': True, 'auto_map': {'AutoConfig': 'configuration_intern_vit.InternVisionConfig', 'AutoModel': 'modeling_intern_vit.InternVisionModel'}, 'capacity_factor': 1.2, 'eval_capacity_factor': 1.4, 'laux_allreduce': 'all_nodes', 'model_type': 'intern_vit_6b', 'moe_coeff_ratio': 0.5, 'moe_intermediate_size': 768, 'moe_output_scale': 4.0, 'noisy_gate_policy': 'RSample_before', 'num_experts': 8, 'num_routed_experts': 4, 'num_shared_experts': 4, 'shared_expert_intermediate_size': 3072, 'use_moe': False, 'use_residual': True, 'use_rts': False, 'use_weighted_residual': False, 'hidden_size': 1024, 'intermediate_size': 4096, 'dropout': 0.0, 'drop_path_rate': 0.0, 'num_hidden_layers': 24, 'num_attention_heads': 16, 'num_channels': 3, 'patch_size': 14, 'image_size': 448, 'initializer_range': 1e-10, 'initializer_factor': 0.1, 'attention_dropout': 0.0, 'layer_norm_eps': 1e-06, 'hidden_act': 'gelu', 'norm_type': 'layer_norm', 'qkv_bias': True, 'qk_normalization': False, 'use_flash_attn': True}, 'llm_config': {'vocab_size': 151677, 'max_position_embeddings': 32768, 'hidden_size': 1536, 'intermediate_size': 8960, 'num_hidden_layers': 28, 'num_attention_heads': 12, 'use_sliding_window': False, 'sliding_window': None, 'max_window_layers': 70, 'num_key_value_heads': 2, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-06, 'use_cache': False, 'rope_theta': 1000000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': True, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['Qwen2ForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 151643, 'pad_token_id': None, 'eos_token_id': 151643, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './pretrained/Qwen2.5-32B-Instruct', 'transformers_version': '4.37.2', '_attn_implementation_autoset': True, 'model_type': 'qwen2', 'moe_config': None, 'rope_scaling': {'factor': 2.0, 'rope_type': 'dynamic', 'type': 'dynamic'}, 'attn_implementation': 'flash_attention_2'}, 'use_backbone_lora': 0, 'use_llm_lora': 64, 'pad2square': False, 'select_layer': -1, 'force_image_size': 448, 'downsample_ratio': 0.5, 'template': 'internvl2_5_nav', 'dynamic_image_size': False, 'use_thumbnail': True, 'ps_version': 'v2', 'min_dynamic_patch': 1, 'max_dynamic_patch': 12, 'num_image_token_bev': 256, 'num_image_token_ego': 32, 'use_pairwise_spatial_encoder': False, 'use_position_embeddings': True, 'dual_text_pos_injection': True, 'bev_image_size': 448, 'vit_bev_freeze': True, 'vit_bev_use_lora': True, 'vit_bev_lora_rank': 64, 'vit_rgb_freeze': True, 'vit_rgb_use_lora': True, 'vit_rgb_lora_rank': 16, 'output_dir': '/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': 11000, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'runs/Feb25_05-58-51_SH-IDC1-10-140-37-45', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 0.5, 'save_total_limit': 2, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 12, 'past_index': -1, 'run_name': 'a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY_steps11000_gpus4_acc1', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': 'zero_stage2_config_acc1.json', 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
23
+ 2026-02-25 06:22:20,606 INFO wandb-AsyncioManager-main:154283 [service_client.py:_forward_responses():80] Reached EOF.
24
+ 2026-02-25 06:22:20,606 INFO wandb-AsyncioManager-main:154283 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
wandb/run-20260225_055938-u2vegsv1/run-u2vegsv1.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:465e04b8972492c2e154f9a5c50c1934ae6a535cad3a90ec8b25e3fa67d05e5c
3
+ size 435093
wandb/run-20260225_063717-0ub00jhc/files/output.log ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0%| | 0/9300 [00:00<?, ?it/s]
2
+ 02/25/2026 06:53:48 - INFO - internvl_cleaned.model.internvl_chat.modeling_internvl_chat - [Text Position Embedding Verification] Sample 1:
3
+ <s> at token 437 sub_idx=0
4
+ <s> at token 1921 sub_idx=1
5
+ <s> at token 3160 sub_idx=2
6
+ <s> at token 4545 sub_idx=3
7
+ <s> at token 5493 sub_idx=4
8
+ <s> at token 6587 sub_idx=5
9
+ <s> at token 7484 sub_idx=6
10
+ <s> at token 8141 sub_idx=7
11
+ <s> at token 8945 sub_idx=8
12
+ <s> at token 9891 sub_idx=9
13
+ <s> at token 10835 sub_idx=10
14
+ <s> at token 12123 sub_idx=11
15
+ <s> at token 13020 sub_idx=12
16
+ <s> at token 13821 sub_idx=13
17
+ <s> at token 14575 sub_idx=14
18
+ <s> at token 15767 sub_idx=15
19
+ <s> at token 16617 sub_idx=16
20
+ <s> at token 17905 sub_idx=17
21
+ <s> at token 19243 sub_idx=18
22
+ <s> at token 20482 sub_idx=19
23
+ <s> at token 21331 sub_idx=20
24
+ <s> at token 22180 sub_idx=21
25
+ <s> at token 23030 sub_idx=22
26
+ <s> at token 23973 sub_idx=23
27
+ <e_s> at token 439 sub_idx=0
28
+ <e_s> at token 1923 sub_idx=1
29
+ <e_s> at token 3162 sub_idx=2
30
+ <e_s> at token 4547 sub_idx=3
31
+ <e_s> at token 5495 sub_idx=4
32
+ <e_s> at token 6589 sub_idx=5
33
+ <e_s> at token 7486 sub_idx=6
34
+ <e_s> at token 8143 sub_idx=7
35
+ <e_s> at token 8947 sub_idx=8
36
+ <e_s> at token 9893 sub_idx=9
37
+ <e_s> at token 10837 sub_idx=10
38
+ <e_s> at token 12125 sub_idx=11
39
+ <e_s> at token 13022 sub_idx=12
40
+ <e_s> at token 13823 sub_idx=13
41
+ <e_s> at token 14577 sub_idx=14
42
+ <e_s> at token 15769 sub_idx=15
43
+ <e_s> at token 16619 sub_idx=16
44
+ <e_s> at token 17907 sub_idx=17
45
+ <e_s> at token 19245 sub_idx=18
46
+ <e_s> at token 20484 sub_idx=19
47
+ <e_s> at token 21333 sub_idx=20
48
+ <e_s> at token 22182 sub_idx=21
49
+ <e_s> at token 23032 sub_idx=22
50
+ <e_s> at token 23975 sub_idx=23
51
+ <cand> 0 at token 444 sub_idx=0 pixel=[320.0,270.0]
52
+ <cand> 1 at token 492 sub_idx=0 pixel=[185.0,213.0]
53
+ <cand> 2 at token 540 sub_idx=0 pixel=[319.0,247.0]
54
+ <cand> 3 at token 588 sub_idx=0 pixel=[256.0,210.0]
55
+ <cand> 4 at token 636 sub_idx=0 pixel=[212.0,138.0]
56
+ <cand> 5 at token 684 sub_idx=0 pixel=[245.0,121.0]
57
+ <cand> 6 at token 732 sub_idx=0 pixel=[272.0,370.0]
58
+ <cand> 7 at token 780 sub_idx=0 pixel=[307.0,160.0]
59
+ <cand> 8 at token 828 sub_idx=0 pixel=[242.0,314.0]
60
+ <cand> 9 at token 876 sub_idx=0 pixel=[181.0,243.0]
61
+ <cand> 10 at token 924 sub_idx=0 pixel=[235.0,183.0]
62
+ <cand> 11 at token 973 sub_idx=0 pixel=[275.0,174.0]
63
+ <cand> 12 at token 1022 sub_idx=0 pixel=[240.0,152.0]
64
+ <cand> 13 at token 1071 sub_idx=0 pixel=[216.0,214.0]
65
+ <cand> 14 at token 1118 sub_idx=0 pixel=[315.0,117.0]
66
+ <cand> 15 at token 1167 sub_idx=0 pixel=[226.0,111.0]
67
+ <cand> 16 at token 1216 sub_idx=0 pixel=[189.0,277.0]
68
+ <cand> 17 at token 1265 sub_idx=0 pixel=[278.0,161.0]
69
+ <cand> 18 at token 1314 sub_idx=0 pixel=[203.0,286.0]
70
+ <cand> 19 at token 1363 sub_idx=0 pixel=[275.0,194.0]
71
+ <cand> 20 at token 1412 sub_idx=0 pixel=[404.0,400.0]
72
+ <cand> 0 at token 1928 sub_idx=1 pixel=[141.0,126.0]
73
+ <cand> 1 at token 1976 sub_idx=1 pixel=[109.0,55.0]
74
+ <cand> 2 at token 2024 sub_idx=1 pixel=[143.0,246.0]
75
+ <cand> 3 at token 2072 sub_idx=1 pixel=[224.0,269.0]
76
+ <cand> 4 at token 2120 sub_idx=1 pixel=[145.0,219.0]
77
+ <cand> 5 at token 2168 sub_idx=1 pixel=[175.0,188.0]
78
+ <cand> 6 at token 2216 sub_idx=1 pixel=[45.0,88.0]
79
+ <cand> 7 at token 2264 sub_idx=1 pixel=[57.0,178.0]
80
+ <cand> 8 at token 2312 sub_idx=1 pixel=[18.0,16.0]
81
+ <cand> 9 at token 2360 sub_idx=1 pixel=[97.0,76.0]
82
+ <cand> 10 at token 2408 sub_idx=1 pixel=[24.0,147.0]
83
+ <cand> 11 at token 2457 sub_idx=1 pixel=[161.0,107.0]
84
+ <cand> 12 at token 2506 sub_idx=1 pixel=[233.0,297.0]
85
+ <cand> 13 at token 2553 sub_idx=1 pixel=[100.0,222.0]
86
+ <cand> 14 at token 2602 sub_idx=1 pixel=[31.0,226.0]
87
+ <cand> 15 at token 2651 sub_idx=1 pixel=[224.0,255.0]
88
+ <cand> 0 at token 3167 sub_idx=2 pixel=[155.0,57.0]
89
+ <cand> 1 at token 3215 sub_idx=2 pixel=[6.0,64.0]
90
+ <cand> 2 at token 3263 sub_idx=2 pixel=[172.0,64.0]
91
+ <cand> 3 at token 3311 sub_idx=2 pixel=[174.0,148.0]
92
+ <cand> 4 at token 3359 sub_idx=2 pixel=[20.0,108.0]
93
+ <cand> 5 at token 3407 sub_idx=2 pixel=[86.0,136.0]
94
+ <cand> 6 at token 3455 sub_idx=2 pixel=[84.0,33.0]
95
+ <cand> 7 at token 3503 sub_idx=2 pixel=[91.0,69.0]
96
+ <cand> 8 at token 3551 sub_idx=2 pixel=[219.0,232.0]
97
+ <cand> 9 at token 3597 sub_idx=2 pixel=[28.0,85.0]
98
+ <cand> 10 at token 3645 sub_idx=2 pixel=[195.0,236.0]
99
+ <cand> 11 at token 3694 sub_idx=2 pixel=[29.0,44.0]
100
+ <cand> 12 at token 3743 sub_idx=2 pixel=[157.0,76.0]
101
+ <cand> 13 at token 3792 sub_idx=2 pixel=[38.0,129.0]
102
+ <cand> 14 at token 3841 sub_idx=2 pixel=[86.0,208.0]
103
+ <cand> 15 at token 3890 sub_idx=2 pixel=[168.0,211.0]
104
+ <cand> 16 at token 3939 sub_idx=2 pixel=[49.0,206.0]
105
+ <cand> 17 at token 3988 sub_idx=2 pixel=[149.0,230.0]
106
+ <cand> 18 at token 4037 sub_idx=2 pixel=[58.0,102.0]
107
+ <cand> 0 at token 4552 sub_idx=3 pixel=[21.0,189.0]
108
+ <cand> 1 at token 4600 sub_idx=3 pixel=[220.0,170.0]
109
+ <cand> 2 at token 4648 sub_idx=3 pixel=[5.0,225.0]
110
+ <cand> 3 at token 4696 sub_idx=3 pixel=[193.0,118.0]
111
+ <cand> 4 at token 4744 sub_idx=3 pixel=[180.0,151.0]
112
+ <cand> 5 at token 4792 sub_idx=3 pixel=[72.0,83.0]
113
+ <cand> 6 at token 4840 sub_idx=3 pixel=[157.0,221.0]
114
+ <cand> 7 at token 4888 sub_idx=3 pixel=[159.0,82.0]
115
+ <cand> 8 at token 4936 sub_idx=3 pixel=[99.0,179.0]
116
+ <cand> 9 at token 4984 sub_idx=3 pixel=[213.0,234.0]
117
+ <cand> 0 at token 5500 sub_idx=4 pixel=[278.0,132.0]
118
+ <cand> 1 at token 5548 sub_idx=4 pixel=[272.0,71.0]
119
+ <cand> 2 at token 5596 sub_idx=4 pixel=[278.0,266.0]
120
+ <cand> 3 at token 5644 sub_idx=4 pixel=[222.0,135.0]
121
+ <cand> 4 at token 5692 sub_idx=4 pixel=[208.0,113.0]
122
+ <cand> 5 at token 5740 sub_idx=4 pixel=[260.0,237.0]
123
+ <cand> 6 at token 5788 sub_idx=4 pixel=[175.0,258.0]
124
+ <cand> 7 at token 5836 sub_idx=4 pixel=[229.0,282.0]
125
+ <cand> 8 at token 5884 sub_idx=4 pixel=[291.0,200.0]
126
+ <cand> 9 at token 5932 sub_idx=4 pixel=[235.0,65.0]
127
+ <cand> 10 at token 5980 sub_idx=4 pixel=[269.0,287.0]
128
+ <cand> 11 at token 6029 sub_idx=4 pixel=[207.0,86.0]
129
+ <cand> 12 at token 6078 sub_idx=4 pixel=[183.0,242.0]
130
+ <cand> 0 at token 6594 sub_idx=5 pixel=[312.0,203.0]
131
+ <cand> 1 at token 6642 sub_idx=5 pixel=[203.0,237.0]
132
+ <cand> 2 at token 6690 sub_idx=5 pixel=[230.0,143.0]
133
+ <cand> 3 at token 6738 sub_idx=5 pixel=[342.0,209.0]
134
+ <cand> 4 at token 6786 sub_idx=5 pixel=[247.0,178.0]
135
+ <cand> 5 at token 6834 sub_idx=5 pixel=[216.0,115.0]
136
+ <cand> 6 at token 6882 sub_idx=5 pixel=[308.0,249.0]
137
+ <cand> 7 at token 6930 sub_idx=5 pixel=[263.0,258.0]
138
+ <cand> 8 at token 6978 sub_idx=5 pixel=[218.0,197.0]
139
+ <cand> 0 at token 7491 sub_idx=6 pixel=[173.0,163.0]
140
+ <cand> 1 at token 7539 sub_idx=6 pixel=[253.0,129.0]
141
+ <cand> 2 at token 7587 sub_idx=6 pixel=[192.0,203.0]
142
+ <cand> 3 at token 7635 sub_idx=6 pixel=[193.0,115.0]
143
+ <cand> 0 at token 8148 sub_idx=7 pixel=[269.0,204.0]
144
+ <cand> 1 at token 8196 sub_idx=7 pixel=[161.0,221.0]
145
+ <cand> 2 at token 8244 sub_idx=7 pixel=[193.0,296.0]
146
+ <cand> 3 at token 8292 sub_idx=7 pixel=[159.0,251.0]
147
+ <cand> 4 at token 8340 sub_idx=7 pixel=[139.0,222.0]
148
+ <cand> 5 at token 8388 sub_idx=7 pixel=[271.0,231.0]
149
+ <cand> 6 at token 8436 sub_idx=7 pixel=[242.0,235.0]
150
+ <cand> 0 at token 8952 sub_idx=8 pixel=[234.0,193.0]
151
+ <cand> 1 at token 9000 sub_idx=8 pixel=[211.0,399.0]
152
+ <cand> 2 at token 9048 sub_idx=8 pixel=[204.0,379.0]
153
+ <cand> 3 at token 9096 sub_idx=8 pixel=[217.0,126.0]
154
+ <cand> 4 at token 9144 sub_idx=8 pixel=[197.0,206.0]
155
+ <cand> 5 at token 9192 sub_idx=8 pixel=[272.0,344.0]
156
+ <cand> 6 at token 9240 sub_idx=8 pixel=[241.0,177.0]
157
+ <cand> 7 at token 9288 sub_idx=8 pixel=[303.0,416.0]
158
+ <cand> 8 at token 9336 sub_idx=8 pixel=[194.0,316.0]
159
+ <cand> 9 at token 9384 sub_idx=8 pixel=[269.0,443.0]
160
+ <cand> 0 at token 9898 sub_idx=9 pixel=[248.0,271.0]
161
+ <cand> 1 at token 9946 sub_idx=9 pixel=[183.0,251.0]
162
+ <cand> 2 at token 9994 sub_idx=9 pixel=[243.0,244.0]
163
+ <cand> 3 at token 10042 sub_idx=9 pixel=[251.0,312.0]
164
+ <cand> 4 at token 10090 sub_idx=9 pixel=[230.0,214.0]
165
+ <cand> 5 at token 10136 sub_idx=9 pixel=[107.0,373.0]
166
+ <cand> 6 at token 10184 sub_idx=9 pixel=[110.0,350.0]
167
+ <cand> 7 at token 10232 sub_idx=9 pixel=[324.0,326.0]
168
+ <cand> 8 at token 10280 sub_idx=9 pixel=[286.0,375.0]
169
+ <cand> 9 at token 10328 sub_idx=9 pixel=[331.0,346.0]
170
+ <cand> 0 at token 10842 sub_idx=10 pixel=[317.0,192.0]
171
+ <cand> 1 at token 10890 sub_idx=10 pixel=[110.0,244.0]
172
+ <cand> 2 at token 10938 sub_idx=10 pixel=[246.0,181.0]
173
+ <cand> 3 at token 10986 sub_idx=10 pixel=[209.0,249.0]
174
+ <cand> 4 at token 11034 sub_idx=10 pixel=[230.0,268.0]
175
+ <cand> 5 at token 11082 sub_idx=10 pixel=[144.0,222.0]
176
+ <cand> 6 at token 11130 sub_idx=10 pixel=[276.0,212.0]
177
+ <cand> 7 at token 11178 sub_idx=10 pixel=[138.0,293.0]
178
+ <cand> 8 at token 11226 sub_idx=10 pixel=[189.0,298.0]
179
+ <cand> 9 at token 11274 sub_idx=10 pixel=[150.0,237.0]
180
+ <cand> 10 at token 11322 sub_idx=10 pixel=[88.0,340.0]
181
+ <cand> 11 at token 11371 sub_idx=10 pixel=[49.0,330.0]
182
+ <cand> 12 at token 11420 sub_idx=10 pixel=[128.0,273.0]
183
+ <cand> 13 at token 11469 sub_idx=10 pixel=[244.0,222.0]
184
+ <cand> 14 at token 11516 sub_idx=10 pixel=[22.0,204.0]
185
+ <cand> 15 at token 11565 sub_idx=10 pixel=[223.0,311.0]
186
+ <cand> 16 at token 11614 sub_idx=10 pixel=[295.0,180.0]
187
+ <cand> 0 at token 12130 sub_idx=11 pixel=[255.0,239.0]
188
+ <cand> 1 at token 12178 sub_idx=11 pixel=[129.0,321.0]
189
+ <cand> 2 at token 12226 sub_idx=11 pixel=[299.0,209.0]
190
+ <cand> 3 at token 12274 sub_idx=11 pixel=[255.0,210.0]
191
+ <cand> 4 at token 12322 sub_idx=11 pixel=[109.0,327.0]
192
+ <cand> 5 at token 12370 sub_idx=11 pixel=[239.0,296.0]
193
+ <cand> 6 at token 12418 sub_idx=11 pixel=[136.0,291.0]
194
+ <cand> 7 at token 12466 sub_idx=11 pixel=[211.0,319.0]
195
+ <cand> 8 at token 12514 sub_idx=11 pixel=[171.0,313.0]
196
+ <cand> 0 at token 13027 sub_idx=12 pixel=[261.0,348.0]
197
+ <cand> 1 at token 13075 sub_idx=12 pixel=[321.0,357.0]
198
+ <cand> 2 at token 13123 sub_idx=12 pixel=[171.0,274.0]
199
+ <cand> 3 at token 13171 sub_idx=12 pixel=[225.0,182.0]
200
+ <cand> 4 at token 13219 sub_idx=12 pixel=[270.0,115.0]
201
+ <cand> 5 at token 13267 sub_idx=12 pixel=[328.0,265.0]
202
+ <cand> 6 at token 13315 sub_idx=12 pixel=[224.0,161.0]
203
+ <cand> 0 at token 13828 sub_idx=13 pixel=[221.0,187.0]
204
+ <cand> 1 at token 13876 sub_idx=13 pixel=[189.0,204.0]
205
+ <cand> 2 at token 13924 sub_idx=13 pixel=[220.0,160.0]
206
+ <cand> 3 at token 13972 sub_idx=13 pixel=[194.0,236.0]
207
+ <cand> 4 at token 14020 sub_idx=13 pixel=[150.0,247.0]
208
+ <cand> 5 at token 14068 sub_idx=13 pixel=[210.0,197.0]
209
+ <cand> 0 at token 14582 sub_idx=14 pixel=[219.0,191.0]
210
+ <cand> 1 at token 14630 sub_idx=14 pixel=[150.0,237.0]
211
+ <cand> 2 at token 14678 sub_idx=14 pixel=[266.0,213.0]
212
+ <cand> 3 at token 14726 sub_idx=14 pixel=[196.0,129.0]
213
+ <cand> 4 at token 14774 sub_idx=14 pixel=[213.0,347.0]
214
+ <cand> 5 at token 14822 sub_idx=14 pixel=[173.0,265.0]
215
+ <cand> 6 at token 14870 sub_idx=14 pixel=[215.0,274.0]
216
+ <cand> 7 at token 14918 sub_idx=14 pixel=[165.0,210.0]
217
+ <cand> 8 at token 14966 sub_idx=14 pixel=[264.0,279.0]
218
+ <cand> 9 at token 15014 sub_idx=14 pixel=[155.0,191.0]
219
+ <cand> 10 at token 15062 sub_idx=14 pixel=[373.0,380.0]
220
+ <cand> 11 at token 15111 sub_idx=14 pixel=[136.0,265.0]
221
+ <cand> 12 at token 15160 sub_idx=14 pixel=[223.0,259.0]
222
+ <cand> 13 at token 15209 sub_idx=14 pixel=[292.0,249.0]
223
+ <cand> 14 at token 15258 sub_idx=14 pixel=[357.0,387.0]
224
+ <cand> 0 at token 15774 sub_idx=15 pixel=[249.0,86.0]
225
+ <cand> 1 at token 15822 sub_idx=15 pixel=[173.0,218.0]
226
+ <cand> 2 at token 15870 sub_idx=15 pixel=[280.0,249.0]
227
+ <cand> 3 at token 15918 sub_idx=15 pixel=[288.0,211.0]
228
+ <cand> 4 at token 15966 sub_idx=15 pixel=[228.0,82.0]
229
+ <cand> 5 at token 16014 sub_idx=15 pixel=[310.0,161.0]
230
+ <cand> 6 at token 16062 sub_idx=15 pixel=[178.0,234.0]
231
+ <cand> 7 at token 16110 sub_idx=15 pixel=[232.0,51.0]
232
+ <cand> 0 at token 16624 sub_idx=16 pixel=[104.0,112.0]
233
+ <cand> 1 at token 16672 sub_idx=16 pixel=[182.0,47.0]
234
+ <cand> 2 at token 16720 sub_idx=16 pixel=[189.0,245.0]
235
+ <cand> 3 at token 16768 sub_idx=16 pixel=[245.0,57.0]
236
+ <cand> 4 at token 16816 sub_idx=16 pixel=[325.0,61.0]
237
+ <cand> 5 at token 16864 sub_idx=16 pixel=[108.0,230.0]
238
+ <cand> 6 at token 16912 sub_idx=16 pixel=[98.0,101.0]
239
+ <cand> 7 at token 16960 sub_idx=16 pixel=[365.0,224.0]
240
+ <cand> 8 at token 17008 sub_idx=16 pixel=[223.0,175.0]
241
+ <cand> 9 at token 17056 sub_idx=16 pixel=[260.0,311.0]
242
+ <cand> 10 at token 17104 sub_idx=16 pixel=[216.0,251.0]
243
+ <cand> 11 at token 17151 sub_idx=16 pixel=[284.0,246.0]
244
+ <cand> 12 at token 17200 sub_idx=16 pixel=[206.0,195.0]
245
+ <cand> 13 at token 17249 sub_idx=16 pixel=[339.0,258.0]
246
+ <cand> 14 at token 17298 sub_idx=16 pixel=[302.0,249.0]
247
+ <cand> 15 at token 17347 sub_idx=16 pixel=[316.0,177.0]
248
+ <cand> 16 at token 17396 sub_idx=16 pixel=[271.0,154.0]
249
+ <cand> 0 at token 17912 sub_idx=17 pixel=[281.0,280.0]
250
+ <cand> 1 at token 17960 sub_idx=17 pixel=[250.0,245.0]
251
+ <cand> 2 at token 18008 sub_idx=17 pixel=[197.0,321.0]
252
+ <cand> 3 at token 18056 sub_idx=17 pixel=[188.0,271.0]
253
+ <cand> 4 at token 18104 sub_idx=17 pixel=[383.0,250.0]
254
+ <cand> 5 at token 18152 sub_idx=17 pixel=[363.0,189.0]
255
+ <cand> 6 at token 18200 sub_idx=17 pixel=[206.0,180.0]
256
+ <cand> 7 at token 18248 sub_idx=17 pixel=[305.0,327.0]
257
+ <cand> 8 at token 18296 sub_idx=17 pixel=[244.0,157.0]
258
+ <cand> 9 at token 18344 sub_idx=17 pixel=[206.0,128.0]
259
+ <cand> 10 at token 18392 sub_idx=17 pixel=[365.0,172.0]
260
+ <cand> 11 at token 18441 sub_idx=17 pixel=[174.0,295.0]
261
+ <cand> 12 at token 18490 sub_idx=17 pixel=[355.0,309.0]
262
+ <cand> 13 at token 18539 sub_idx=17 pixel=[240.0,235.0]
263
+ <cand> 14 at token 18586 sub_idx=17 pixel=[153.0,326.0]
264
+ <cand> 15 at token 18635 sub_idx=17 pixel=[238.0,325.0]
265
+ <cand> 16 at token 18684 sub_idx=17 pixel=[364.0,327.0]
266
+ <cand> 17 at token 18733 sub_idx=17 pixel=[179.0,248.0]
267
+ <cand> 0 at token 19250 sub_idx=18 pixel=[196.0,286.0]
268
+ <cand> 1 at token 19298 sub_idx=18 pixel=[186.0,308.0]
269
+ <cand> 2 at token 19346 sub_idx=18 pixel=[354.0,318.0]
270
+ <cand> 3 at token 19394 sub_idx=18 pixel=[196.0,186.0]
271
+ <cand> 4 at token 19442 sub_idx=18 pixel=[212.0,193.0]
272
+ <cand> 5 at token 19490 sub_idx=18 pixel=[205.0,259.0]
273
+ <cand> 6 at token 19538 sub_idx=18 pixel=[321.0,249.0]
274
+ <cand> 7 at token 19586 sub_idx=18 pixel=[236.0,238.0]
275
+ <cand> 8 at token 19634 sub_idx=18 pixel=[323.0,267.0]
276
+ <cand> 9 at token 19682 sub_idx=18 pixel=[152.0,188.0]
277
+ <cand> 10 at token 19730 sub_idx=18 pixel=[305.0,190.0]
278
+ <cand> 11 at token 19779 sub_idx=18 pixel=[237.0,268.0]
279
+ <cand> 12 at token 19826 sub_idx=18 pixel=[143.0,203.0]
280
+ <cand> 13 at token 19875 sub_idx=18 pixel=[226.0,280.0]
281
+ <cand> 14 at token 19924 sub_idx=18 pixel=[237.0,320.0]
282
+ <cand> 15 at token 19973 sub_idx=18 pixel=[188.0,183.0]
283
+ <cand> 0 at token 20489 sub_idx=19 pixel=[197.0,269.0]
284
+ <cand> 1 at token 20537 sub_idx=19 pixel=[227.0,291.0]
285
+ <cand> 2 at token 20585 sub_idx=19 pixel=[209.0,204.0]
286
+ <cand> 3 at token 20633 sub_idx=19 pixel=[230.0,264.0]
287
+ <cand> 4 at token 20681 sub_idx=19 pixel=[247.0,174.0]
288
+ <cand> 5 at token 20729 sub_idx=19 pixel=[246.0,194.0]
289
+ <cand> 6 at token 20777 sub_idx=19 pixel=[264.0,226.0]
290
+ <cand> 7 at token 20825 sub_idx=19 pixel=[260.0,217.0]
291
+ <cand> 0 at token 21338 sub_idx=20 pixel=[237.0,243.0]
292
+ <cand> 1 at token 21384 sub_idx=20 pixel=[277.0,77.0]
293
+ <cand> 2 at token 21432 sub_idx=20 pixel=[310.0,237.0]
294
+ <cand> 3 at token 21480 sub_idx=20 pixel=[328.0,289.0]
295
+ <cand> 4 at token 21528 sub_idx=20 pixel=[245.0,101.0]
296
+ <cand> 5 at token 21576 sub_idx=20 pixel=[241.0,159.0]
297
+ <cand> 6 at token 21624 sub_idx=20 pixel=[183.0,200.0]
298
+ <cand> 7 at token 21672 sub_idx=20 pixel=[229.0,270.0]
299
+ <cand> 0 at token 22187 sub_idx=21 pixel=[134.0,124.0]
300
+ <cand> 1 at token 22235 sub_idx=21 pixel=[195.0,137.0]
301
+ <cand> 2 at token 22283 sub_idx=21 pixel=[65.0,112.0]
302
+ <cand> 3 at token 22331 sub_idx=21 pixel=[255.0,264.0]
303
+ <cand> 4 at token 22379 sub_idx=21 pixel=[207.0,281.0]
304
+ <cand> 5 at token 22427 sub_idx=21 pixel=[97.0,122.0]
305
+ <cand> 6 at token 22475 sub_idx=21 pixel=[241.0,182.0]
306
+ <cand> 7 at token 22523 sub_idx=21 pixel=[226.0,183.0]
307
+ <cand> 0 at token 23037 sub_idx=22 pixel=[242.0,288.0]
308
+ <cand> 1 at token 23085 sub_idx=22 pixel=[113.0,419.0]
309
+ <cand> 2 at token 23133 sub_idx=22 pixel=[218.0,220.0]
310
+ <cand> 3 at token 23179 sub_idx=22 pixel=[142.0,145.0]
311
+ <cand> 4 at token 23227 sub_idx=22 pixel=[218.0,368.0]
312
+ <cand> 5 at token 23275 sub_idx=22 pixel=[208.0,266.0]
313
+ <cand> 6 at token 23323 sub_idx=22 pixel=[198.0,377.0]
314
+ <cand> 7 at token 23371 sub_idx=22 pixel=[122.0,372.0]
315
+ <cand> 8 at token 23419 sub_idx=22 pixel=[226.0,278.0]
316
+ <cand> 9 at token 23467 sub_idx=22 pixel=[239.0,278.0]
317
+ <cand> 0 at token 23980 sub_idx=23 pixel=[197.0,39.0]
318
+ <cand> 1 at token 24028 sub_idx=23 pixel=[290.0,274.0]
319
+ <cand> 2 at token 24076 sub_idx=23 pixel=[228.0,337.0]
320
+ <cand> 3 at token 24124 sub_idx=23 pixel=[259.0,296.0]
321
+ <cand> 4 at token 24172 sub_idx=23 pixel=[267.0,247.0]
322
+ <cand> 5 at token 24220 sub_idx=23 pixel=[185.0,32.0]
323
+ <cand> 6 at token 24268 sub_idx=23 pixel=[318.0,213.0]
324
+ <cand> 7 at token 24316 sub_idx=23 pixel=[316.0,38.0]
325
+ <cand> 8 at token 24364 sub_idx=23 pixel=[267.0,316.0]
326
+ <cand> 9 at token 24412 sub_idx=23 pixel=[209.0,218.0]
327
+ <e_cand> 0 at token 490 sub_idx=0 (same embed)
328
+ <e_cand> 1 at token 538 sub_idx=0 (same embed)
329
+ <e_cand> 2 at token 586 sub_idx=0 (same embed)
330
+ <e_cand> 0 at token 1974 sub_idx=1 (same embed)
331
+ <e_cand> 1 at token 2022 sub_idx=1 (same embed)
332
+ <e_cand> 2 at token 2070 sub_idx=1 (same embed)
333
+ <e_cand> 0 at token 3213 sub_idx=2 (same embed)
334
+ <e_cand> 1 at token 3261 sub_idx=2 (same embed)
335
+ <e_cand> 2 at token 3309 sub_idx=2 (same embed)
336
+ <e_cand> 0 at token 4598 sub_idx=3 (same embed)
337
+ <e_cand> 1 at token 4646 sub_idx=3 (same embed)
338
+ <e_cand> 2 at token 4694 sub_idx=3 (same embed)
339
+ <e_cand> 0 at token 5546 sub_idx=4 (same embed)
340
+ <e_cand> 1 at token 5594 sub_idx=4 (same embed)
341
+ <e_cand> 2 at token 5642 sub_idx=4 (same embed)
342
+ <e_cand> 0 at token 6640 sub_idx=5 (same embed)
343
+ <e_cand> 1 at token 6688 sub_idx=5 (same embed)
344
+ <e_cand> 2 at token 6736 sub_idx=5 (same embed)
345
+ <e_cand> 0 at token 7537 sub_idx=6 (same embed)
346
+ <e_cand> 1 at token 7585 sub_idx=6 (same embed)
347
+ <e_cand> 2 at token 7633 sub_idx=6 (same embed)
348
+ <e_cand> 0 at token 8194 sub_idx=7 (same embed)
349
+ <e_cand> 1 at token 8242 sub_idx=7 (same embed)
350
+ <e_cand> 2 at token 8290 sub_idx=7 (same embed)
351
+ <e_cand> 0 at token 8998 sub_idx=8 (same embed)
352
+ <e_cand> 1 at token 9046 sub_idx=8 (same embed)
353
+ <e_cand> 2 at token 9094 sub_idx=8 (same embed)
354
+ <e_cand> 0 at token 9944 sub_idx=9 (same embed)
355
+ <e_cand> 1 at token 9992 sub_idx=9 (same embed)
356
+ <e_cand> 2 at token 10040 sub_idx=9 (same embed)
357
+ <e_cand> 0 at token 10888 sub_idx=10 (same embed)
358
+ <e_cand> 1 at token 10936 sub_idx=10 (same embed)
359
+ <e_cand> 2 at token 10984 sub_idx=10 (same embed)
360
+ <e_cand> 0 at token 12176 sub_idx=11 (same embed)
361
+ <e_cand> 1 at token 12224 sub_idx=11 (same embed)
362
+ <e_cand> 2 at token 12272 sub_idx=11 (same embed)
363
+ <e_cand> 0 at token 13073 sub_idx=12 (same embed)
364
+ <e_cand> 1 at token 13121 sub_idx=12 (same embed)
365
+ <e_cand> 2 at token 13169 sub_idx=12 (same embed)
366
+ <e_cand> 0 at token 13874 sub_idx=13 (same embed)
367
+ <e_cand> 1 at token 13922 sub_idx=13 (same embed)
368
+ <e_cand> 2 at token 13970 sub_idx=13 (same embed)
369
+ <e_cand> 0 at token 14628 sub_idx=14 (same embed)
370
+ <e_cand> 1 at token 14676 sub_idx=14 (same embed)
371
+ <e_cand> 2 at token 14724 sub_idx=14 (same embed)
372
+ <e_cand> 0 at token 15820 sub_idx=15 (same embed)
373
+ <e_cand> 1 at token 15868 sub_idx=15 (same embed)
374
+ <e_cand> 2 at token 15916 sub_idx=15 (same embed)
375
+ <e_cand> 0 at token 16670 sub_idx=16 (same embed)
376
+ <e_cand> 1 at token 16718 sub_idx=16 (same embed)
377
+ <e_cand> 2 at token 16766 sub_idx=16 (same embed)
378
+ <e_cand> 0 at token 17958 sub_idx=17 (same embed)
379
+ <e_cand> 1 at token 18006 sub_idx=17 (same embed)
380
+ <e_cand> 2 at token 18054 sub_idx=17 (same embed)
381
+ <e_cand> 0 at token 19296 sub_idx=18 (same embed)
382
+ <e_cand> 1 at token 19344 sub_idx=18 (same embed)
383
+ <e_cand> 2 at token 19392 sub_idx=18 (same embed)
384
+ <e_cand> 0 at token 20535 sub_idx=19 (same embed)
385
+ <e_cand> 1 at token 20583 sub_idx=19 (same embed)
386
+ <e_cand> 2 at token 20631 sub_idx=19 (same embed)
387
+ <e_cand> 0 at token 21382 sub_idx=20 (same embed)
388
+ <e_cand> 1 at token 21430 sub_idx=20 (same embed)
389
+ <e_cand> 2 at token 21478 sub_idx=20 (same embed)
390
+ <e_cand> 0 at token 22233 sub_idx=21 (same embed)
391
+ <e_cand> 1 at token 22281 sub_idx=21 (same embed)
392
+ <e_cand> 2 at token 22329 sub_idx=21 (same embed)
393
+ <e_cand> 0 at token 23083 sub_idx=22 (same embed)
394
+ <e_cand> 1 at token 23131 sub_idx=22 (same embed)
395
+ <e_cand> 2 at token 23177 sub_idx=22 (same embed)
396
+ <e_cand> 0 at token 24026 sub_idx=23 (same embed)
397
+ <e_cand> 1 at token 24074 sub_idx=23 (same embed)
398
+ <e_cand> 2 at token 24122 sub_idx=23 (same embed)
wandb/run-20260225_063717-0ub00jhc/files/requirements.txt ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cuda-runtime-cu12==12.1.105
2
+ platformdirs==4.9.2
3
+ wandb==0.22.3
4
+ transformers==4.37.2
5
+ einops==0.8.2
6
+ accelerate==0.33.0
7
+ aiosignal==1.4.0
8
+ huggingface_hub==0.36.2
9
+ pycparser==2.22
10
+ multidict==6.7.1
11
+ urllib3==2.6.3
12
+ urllib3==2.5.0
13
+ aiohappyeyeballs==2.6.1
14
+ nvidia-cusolver-cu12==11.4.5.107
15
+ yarl==1.22.0
16
+ typing-inspection==0.4.2
17
+ gitdb==4.0.12
18
+ multiprocess==0.70.16
19
+ async-timeout==5.0.1
20
+ Jinja2==3.1.6
21
+ setuptools==82.0.0
22
+ torch==2.5.1+cu121
23
+ hyperframe==6.1.0
24
+ peft==0.10.0
25
+ tzdata==2025.3
26
+ dill==0.3.8
27
+ orjson==3.11.7
28
+ nvidia-cufft-cu12==11.0.2.54
29
+ propcache==0.4.1
30
+ PySocks==1.7.1
31
+ nvidia-cublas-cu12==12.1.3.1
32
+ nvidia-nccl-cu12==2.21.5
33
+ docker-pycreds==0.4.0
34
+ pydantic_core==2.41.5
35
+ nvidia-ml-py==13.590.48
36
+ ninja==1.13.0
37
+ cffi==1.15.0
38
+ nvidia-cuda-cupti-cu12==12.1.105
39
+ triton==3.1.0
40
+ annotated-types==0.7.0
41
+ nvidia-nvtx-cu12==12.1.105
42
+ timm==1.0.22
43
+ sympy==1.13.1
44
+ pydantic==2.12.5
45
+ xxhash==3.6.0
46
+ py-cpuinfo==9.0.0
47
+ sentry-sdk==2.53.0
48
+ networkx==3.4.2
49
+ click==8.3.1
50
+ regex==2026.1.15
51
+ pillow==12.0.0
52
+ zstandard==0.23.0
53
+ pyarrow==20.0.0
54
+ GitPython==3.1.46
55
+ wheel==0.46.3
56
+ fsspec==2025.3.0
57
+ typing_extensions==4.15.0
58
+ hf-xet==1.2.0
59
+ torchvision==0.20.1+cu121
60
+ python-dateutil==2.9.0.post0
61
+ numpy==1.26.4
62
+ smmap==5.0.2
63
+ setproctitle==1.2.3
64
+ safetensors==0.7.0
65
+ requests==2.32.5
66
+ hpack==4.1.0
67
+ six==1.17.0
68
+ modelscope==1.34.0
69
+ pip==26.0.1
70
+ h2==4.3.0
71
+ PyYAML==6.0.3
72
+ PyYAML==6.0
73
+ torchaudio==2.5.1+cu121
74
+ nvidia-nvjitlink-cu12==12.9.86
75
+ psutil==7.2.2
76
+ psutil==5.9.1
77
+ pandas==2.3.2
78
+ pytz==2025.2
79
+ nvidia-cudnn-cu12==9.1.0.70
80
+ packaging==25.0
81
+ hjson==3.1.0
82
+ deepspeed==0.14.4
83
+ datasets==4.0.0
84
+ nvidia-cuda-nvrtc-cu12==12.1.105
85
+ tokenizers==0.15.2
86
+ MarkupSafe==2.1.5
87
+ charset-normalizer==3.4.4
88
+ frozenlist==1.8.0
89
+ Brotli==1.0.9
90
+ certifi==2026.1.4
91
+ mpmath==1.3.0
92
+ protobuf==3.20.1
93
+ tqdm==4.67.3
94
+ nvidia-cusparse-cu12==12.1.0.106
95
+ attrs==25.4.0
96
+ appdirs==1.4.4
97
+ flash_attn==2.7.4.post1
98
+ aiohttp==3.13.3
99
+ filelock==3.20.0
100
+ idna==3.11
101
+ nvidia-curand-cu12==10.3.2.106
102
+ zipp==3.23.0
103
+ backports.tarfile==1.2.0
104
+ autocommand==2.2.2
105
+ platformdirs==4.4.0
106
+ jaraco.text==4.0.0
107
+ more-itertools==10.8.0
108
+ jaraco.context==6.1.0
109
+ jaraco.functools==4.4.0
110
+ wheel==0.46.3
111
+ tomli==2.4.0
112
+ packaging==26.0
113
+ importlib_metadata==8.7.1
wandb/run-20260225_063717-0ub00jhc/files/wandb-metadata.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-3.10.0-957.el7.x86_64-x86_64-with-glibc2.17",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2026-02-24T22:37:17.678092Z",
5
+ "args": [
6
+ "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/resolved_train_config.json"
7
+ ],
8
+ "program": "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py",
9
+ "codePath": "InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py",
10
+ "codePathLocal": "internvl_cleaned/train/internvl_chat_finetune.py",
11
+ "git": {
12
+ "remote": "git@github.com:Yuxin916/CL_CoTNav.git",
13
+ "commit": "f7d6fbe6a8031d29a95c2f3be79e99f96670a12e"
14
+ },
15
+ "email": "caiy0039@e.ntu.edu.sg",
16
+ "root": "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY",
17
+ "host": "SH-IDC1-10-140-37-45",
18
+ "executable": "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/bin/python3.10",
19
+ "cpu_count": 64,
20
+ "cpu_count_logical": 128,
21
+ "gpu": "NVIDIA A100-SXM4-80GB",
22
+ "gpu_count": 8,
23
+ "disk": {
24
+ "/": {
25
+ "total": "524945911808",
26
+ "used": "39460663296"
27
+ }
28
+ },
29
+ "memory": {
30
+ "total": "1081627828224"
31
+ },
32
+ "gpu_nvidia": [
33
+ {
34
+ "name": "NVIDIA A100-SXM4-80GB",
35
+ "memoryTotal": "85899345920",
36
+ "cudaCores": 6912,
37
+ "architecture": "Ampere",
38
+ "uuid": "GPU-9e78ad4b-b304-a199-38f4-24b3acd9b531"
39
+ },
40
+ {
41
+ "name": "NVIDIA A100-SXM4-80GB",
42
+ "memoryTotal": "85899345920",
43
+ "cudaCores": 6912,
44
+ "architecture": "Ampere",
45
+ "uuid": "GPU-19c9429b-5ecf-0b76-7d81-03c36a449f32"
46
+ },
47
+ {
48
+ "name": "NVIDIA A100-SXM4-80GB",
49
+ "memoryTotal": "85899345920",
50
+ "cudaCores": 6912,
51
+ "architecture": "Ampere",
52
+ "uuid": "GPU-57520d78-d04a-3028-bd40-5ebd78e123e6"
53
+ },
54
+ {
55
+ "name": "NVIDIA A100-SXM4-80GB",
56
+ "memoryTotal": "85899345920",
57
+ "cudaCores": 6912,
58
+ "architecture": "Ampere",
59
+ "uuid": "GPU-0d94270a-8ad3-b3a6-4acd-145fccc36d85"
60
+ },
61
+ {
62
+ "name": "NVIDIA A100-SXM4-80GB",
63
+ "memoryTotal": "85899345920",
64
+ "cudaCores": 6912,
65
+ "architecture": "Ampere",
66
+ "uuid": "GPU-b9b6fe80-e37a-b622-0deb-27ef46a965ff"
67
+ },
68
+ {
69
+ "name": "NVIDIA A100-SXM4-80GB",
70
+ "memoryTotal": "85899345920",
71
+ "cudaCores": 6912,
72
+ "architecture": "Ampere",
73
+ "uuid": "GPU-7b870038-dbe6-4039-b6bf-f90517f43f6c"
74
+ },
75
+ {
76
+ "name": "NVIDIA A100-SXM4-80GB",
77
+ "memoryTotal": "85899345920",
78
+ "cudaCores": 6912,
79
+ "architecture": "Ampere",
80
+ "uuid": "GPU-e3065d01-9539-64c9-417f-91273b521051"
81
+ },
82
+ {
83
+ "name": "NVIDIA A100-SXM4-80GB",
84
+ "memoryTotal": "85899345920",
85
+ "cudaCores": 6912,
86
+ "architecture": "Ampere",
87
+ "uuid": "GPU-28ce6eed-85b8-155a-5a5d-412dfb1dd1c0"
88
+ }
89
+ ],
90
+ "cudaVersion": "12.2",
91
+ "slurm": {
92
+ "cluster_name": "cluster_sproject3",
93
+ "conf": "/etc/slurm/slurm.conf",
94
+ "cpus_on_node": "128",
95
+ "cpus_per_task": "8",
96
+ "distribution": "cyclic",
97
+ "gtids": "0",
98
+ "job_account": "research",
99
+ "job_cpus_per_node": "128",
100
+ "job_cpus_per_node_pack_group_0": "128",
101
+ "job_gid": "200000139",
102
+ "job_gpus": "0,1,2,3,4,5,6,7",
103
+ "job_id": "7464696",
104
+ "job_name": "vlm_ft",
105
+ "job_nodelist": "SH-IDC1-10-140-37-45",
106
+ "job_num_nodes": "1",
107
+ "job_partition": "interntmp",
108
+ "job_qos": "normal",
109
+ "job_uid": "200000139",
110
+ "job_user": "wangmaonan",
111
+ "jobid": "7464696",
112
+ "launch_node_ipaddr": "10.140.37.45",
113
+ "localid": "0",
114
+ "mem_per_node": "49152",
115
+ "nnodes": "1",
116
+ "node_aliases": "(null)",
117
+ "nodeid": "0",
118
+ "nodelist": "SH-IDC1-10-140-37-45",
119
+ "nprocs": "1",
120
+ "ntasks": "1",
121
+ "ntasks_per_node": "1",
122
+ "prio_process": "0",
123
+ "procid": "0",
124
+ "srun_comm_host": "10.140.37.45",
125
+ "srun_comm_port": "40413",
126
+ "step_gpus": "0,1,2,3",
127
+ "step_id": "0",
128
+ "step_launcher_port": "40413",
129
+ "step_nodelist": "SH-IDC1-10-140-37-45",
130
+ "step_num_nodes": "1",
131
+ "step_num_tasks": "1",
132
+ "step_tasks_per_node": "1",
133
+ "stepid": "0",
134
+ "submit_dir": "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav",
135
+ "submit_host": "SH-IDC1-10-140-37-45",
136
+ "task_pid": "236139",
137
+ "tasks_per_node": "1",
138
+ "topology_addr": "SH-IDC1-10-140-37-45",
139
+ "topology_addr_pattern": "node",
140
+ "umask": "0002",
141
+ "working_cluster": "cluster_sproject3:SH-IDC1-10-140-37-161:6817:9216:109"
142
+ },
143
+ "writerId": "1xd6l9g7kzysn4yq7wybuhrbyoibnbxf"
144
+ }
wandb/run-20260225_063717-0ub00jhc/logs/debug-core.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2026-02-25T06:37:17.85489302+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmps47xgacm/port-236721.txt","pid":236721,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2026-02-25T06:37:17.857743824+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":236721}
3
+ {"time":"2026-02-25T06:37:17.857993952+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-236721-239969-3041544036/socket","Net":"unix"}}
4
+ {"time":"2026-02-25T06:37:17.925755794+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2026-02-25T06:37:17.936203006+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"0ub00jhc","id":"1(@)"}
6
+ {"time":"2026-02-25T06:37:18.342592105+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"0ub00jhc","id":"1(@)"}
7
+ {"time":"2026-02-25T06:54:14.629396898+08:00","level":"INFO","msg":"server: parent process exited, terminating service process"}
wandb/run-20260225_063717-0ub00jhc/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2026-02-25T06:37:17.937232022+08:00","level":"INFO","msg":"stream: starting","core version":"0.22.3"}
2
+ {"time":"2026-02-25T06:37:18.338623764+08:00","level":"INFO","msg":"stream: created new stream","id":"0ub00jhc"}
3
+ {"time":"2026-02-25T06:37:18.339228707+08:00","level":"INFO","msg":"handler: started","stream_id":"0ub00jhc"}
4
+ {"time":"2026-02-25T06:37:18.342133656+08:00","level":"INFO","msg":"stream: started","id":"0ub00jhc"}
5
+ {"time":"2026-02-25T06:37:18.34215159+08:00","level":"INFO","msg":"sender: started","stream_id":"0ub00jhc"}
6
+ {"time":"2026-02-25T06:37:18.342148878+08:00","level":"INFO","msg":"writer: started","stream_id":"0ub00jhc"}
wandb/run-20260225_063717-0ub00jhc/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-02-25 06:37:17,697 INFO MainThread:236721 [wandb_setup.py:_flush():81] Current SDK version is 0.22.3
2
+ 2026-02-25 06:37:17,699 INFO MainThread:236721 [wandb_setup.py:_flush():81] Configure stats pid to 236721
3
+ 2026-02-25 06:37:17,699 INFO MainThread:236721 [wandb_setup.py:_flush():81] Loading settings from /mnt/petrelfs/wangmaonan/.config/wandb/settings
4
+ 2026-02-25 06:37:17,700 INFO MainThread:236721 [wandb_setup.py:_flush():81] Loading settings from /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/wandb/settings
5
+ 2026-02-25 06:37:17,700 INFO MainThread:236721 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2026-02-25 06:37:17,701 INFO MainThread:236721 [wandb_init.py:setup_run_log_directory():706] Logging user logs to /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/wandb/run-20260225_063717-0ub00jhc/logs/debug.log
7
+ 2026-02-25 06:37:17,701 INFO MainThread:236721 [wandb_init.py:setup_run_log_directory():707] Logging internal logs to /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/wandb/run-20260225_063717-0ub00jhc/logs/debug-internal.log
8
+ 2026-02-25 06:37:17,702 INFO MainThread:236721 [wandb_init.py:init():833] calling init triggers
9
+ 2026-02-25 06:37:17,702 INFO MainThread:236721 [wandb_init.py:init():838] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2026-02-25 06:37:17,703 INFO MainThread:236721 [wandb_init.py:init():881] starting backend
12
+ 2026-02-25 06:37:17,925 INFO MainThread:236721 [wandb_init.py:init():884] sending inform_init request
13
+ 2026-02-25 06:37:17,932 INFO MainThread:236721 [wandb_init.py:init():892] backend started and connected
14
+ 2026-02-25 06:37:17,933 INFO MainThread:236721 [wandb_init.py:init():962] updated telemetry
15
+ 2026-02-25 06:37:17,963 INFO MainThread:236721 [wandb_init.py:init():986] communicating run to backend with 90.0 second timeout
16
+ 2026-02-25 06:37:19,017 INFO MainThread:236721 [wandb_init.py:init():1033] starting run threads in backend
17
+ 2026-02-25 06:37:19,332 INFO MainThread:236721 [wandb_run.py:_console_start():2506] atexit reg
18
+ 2026-02-25 06:37:19,333 INFO MainThread:236721 [wandb_run.py:_redirect():2354] redirect: wrap_raw
19
+ 2026-02-25 06:37:19,333 INFO MainThread:236721 [wandb_run.py:_redirect():2423] Wrapping output streams.
20
+ 2026-02-25 06:37:19,333 INFO MainThread:236721 [wandb_run.py:_redirect():2446] Redirects installed.
21
+ 2026-02-25 06:37:19,340 INFO MainThread:236721 [wandb_init.py:init():1073] run started, returning control to user process
22
+ 2026-02-25 06:37:19,343 INFO MainThread:236721 [wandb_run.py:_config_callback():1390] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['InternVLChatModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': None, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '../pretrained/InternVL3-2B', '_commit_hash': None, '_attn_implementation_internal': None, 'transformers_version': None, 'auto_map': {'AutoConfig': 'configuration_internvl_chat.InternVLChatConfig', 'AutoModel': 'modeling_internvl_chat.InternVLChatModel', 'AutoModelForCausalLM': 'modeling_internvl_chat.InternVLChatModel'}, 'hidden_size': 1536, 'image_fold': None, 'model_type': 'internvl_chat', 'system_message': 'You are an autonomous navigation agent operating in indoor environments. You receive spatial information through position embeddings injected into visual features and text tokens. Use the BEV map, position embeddings, and semantic information to make navigation decisions. When the target object is detected (<target> marker), navigate directly to it. Otherwise, explore frontiers strategically to find the goal object.', 'vision_config': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': True, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['InternVisionModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': None, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'OpenGVLab/InternViT-6B-448px-V1-5', 'transformers_version': '4.37.2', '_attn_implementation_autoset': True, 'auto_map': {'AutoConfig': 'configuration_intern_vit.InternVisionConfig', 'AutoModel': 'modeling_intern_vit.InternVisionModel'}, 'capacity_factor': 1.2, 'eval_capacity_factor': 1.4, 'laux_allreduce': 'all_nodes', 'model_type': 'intern_vit_6b', 'moe_coeff_ratio': 0.5, 'moe_intermediate_size': 768, 'moe_output_scale': 4.0, 'noisy_gate_policy': 'RSample_before', 'num_experts': 8, 'num_routed_experts': 4, 'num_shared_experts': 4, 'shared_expert_intermediate_size': 3072, 'use_moe': False, 'use_residual': True, 'use_rts': False, 'use_weighted_residual': False, 'hidden_size': 1024, 'intermediate_size': 4096, 'dropout': 0.0, 'drop_path_rate': 0.0, 'num_hidden_layers': 24, 'num_attention_heads': 16, 'num_channels': 3, 'patch_size': 14, 'image_size': 448, 'initializer_range': 1e-10, 'initializer_factor': 0.1, 'attention_dropout': 0.0, 'layer_norm_eps': 1e-06, 'hidden_act': 'gelu', 'norm_type': 'layer_norm', 'qkv_bias': True, 'qk_normalization': False, 'use_flash_attn': True}, 'llm_config': {'vocab_size': 151677, 'max_position_embeddings': 32768, 'hidden_size': 1536, 'intermediate_size': 8960, 'num_hidden_layers': 28, 'num_attention_heads': 12, 'use_sliding_window': False, 'sliding_window': None, 'max_window_layers': 70, 'num_key_value_heads': 2, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-06, 'use_cache': False, 'rope_theta': 1000000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': True, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['Qwen2ForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 151643, 'pad_token_id': None, 'eos_token_id': 151643, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './pretrained/Qwen2.5-32B-Instruct', 'transformers_version': '4.37.2', '_attn_implementation_autoset': True, 'model_type': 'qwen2', 'moe_config': None, 'rope_scaling': {'factor': 2.0, 'rope_type': 'dynamic', 'type': 'dynamic'}, 'attn_implementation': 'flash_attention_2'}, 'use_backbone_lora': 0, 'use_llm_lora': 64, 'pad2square': False, 'select_layer': -1, 'force_image_size': 448, 'downsample_ratio': 0.5, 'template': 'internvl2_5_nav', 'dynamic_image_size': False, 'use_thumbnail': True, 'ps_version': 'v2', 'min_dynamic_patch': 1, 'max_dynamic_patch': 12, 'num_image_token_bev': 256, 'num_image_token_ego': 32, 'use_pairwise_spatial_encoder': False, 'use_position_embeddings': True, 'dual_text_pos_injection': True, 'bev_image_size': 448, 'vit_bev_freeze': True, 'vit_bev_use_lora': True, 'vit_bev_lora_rank': 64, 'vit_rgb_freeze': True, 'vit_rgb_use_lora': True, 'vit_rgb_lora_rank': 16, 'output_dir': '/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': 9300, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'runs/Feb25_06-36-26_SH-IDC1-10-140-37-45', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 0.5, 'save_total_limit': 2, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 12, 'past_index': -1, 'run_name': 'a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY_steps9300_gpus4_acc1', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': 'zero_stage2_config_acc1.json', 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
wandb/run-20260225_063717-0ub00jhc/run-0ub00jhc.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e269023c47e74756d9b57ae41c89068739f24c06f6ea070aa7b3e56a496b14df
3
+ size 262144
wandb/run-20260225_094307-hmhb8ltr/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20260225_094307-hmhb8ltr/files/requirements.txt ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cuda-runtime-cu12==12.1.105
2
+ platformdirs==4.9.2
3
+ wandb==0.22.3
4
+ transformers==4.37.2
5
+ einops==0.8.2
6
+ accelerate==0.33.0
7
+ aiosignal==1.4.0
8
+ huggingface_hub==0.36.2
9
+ pycparser==2.22
10
+ multidict==6.7.1
11
+ urllib3==2.6.3
12
+ urllib3==2.5.0
13
+ aiohappyeyeballs==2.6.1
14
+ nvidia-cusolver-cu12==11.4.5.107
15
+ yarl==1.22.0
16
+ typing-inspection==0.4.2
17
+ gitdb==4.0.12
18
+ multiprocess==0.70.16
19
+ async-timeout==5.0.1
20
+ Jinja2==3.1.6
21
+ setuptools==82.0.0
22
+ torch==2.5.1+cu121
23
+ hyperframe==6.1.0
24
+ peft==0.10.0
25
+ tzdata==2025.3
26
+ dill==0.3.8
27
+ orjson==3.11.7
28
+ nvidia-cufft-cu12==11.0.2.54
29
+ propcache==0.4.1
30
+ PySocks==1.7.1
31
+ nvidia-cublas-cu12==12.1.3.1
32
+ nvidia-nccl-cu12==2.21.5
33
+ docker-pycreds==0.4.0
34
+ pydantic_core==2.41.5
35
+ nvidia-ml-py==13.590.48
36
+ ninja==1.13.0
37
+ cffi==1.15.0
38
+ nvidia-cuda-cupti-cu12==12.1.105
39
+ triton==3.1.0
40
+ annotated-types==0.7.0
41
+ nvidia-nvtx-cu12==12.1.105
42
+ timm==1.0.22
43
+ sympy==1.13.1
44
+ pydantic==2.12.5
45
+ xxhash==3.6.0
46
+ py-cpuinfo==9.0.0
47
+ sentry-sdk==2.53.0
48
+ networkx==3.4.2
49
+ click==8.3.1
50
+ regex==2026.1.15
51
+ pillow==12.0.0
52
+ zstandard==0.23.0
53
+ pyarrow==20.0.0
54
+ GitPython==3.1.46
55
+ wheel==0.46.3
56
+ fsspec==2025.3.0
57
+ typing_extensions==4.15.0
58
+ hf-xet==1.2.0
59
+ torchvision==0.20.1+cu121
60
+ python-dateutil==2.9.0.post0
61
+ numpy==1.26.4
62
+ smmap==5.0.2
63
+ setproctitle==1.2.3
64
+ safetensors==0.7.0
65
+ requests==2.32.5
66
+ hpack==4.1.0
67
+ six==1.17.0
68
+ modelscope==1.34.0
69
+ pip==26.0.1
70
+ h2==4.3.0
71
+ PyYAML==6.0.3
72
+ PyYAML==6.0
73
+ torchaudio==2.5.1+cu121
74
+ nvidia-nvjitlink-cu12==12.9.86
75
+ psutil==7.2.2
76
+ psutil==5.9.1
77
+ pandas==2.3.2
78
+ pytz==2025.2
79
+ nvidia-cudnn-cu12==9.1.0.70
80
+ packaging==25.0
81
+ hjson==3.1.0
82
+ deepspeed==0.14.4
83
+ datasets==4.0.0
84
+ nvidia-cuda-nvrtc-cu12==12.1.105
85
+ tokenizers==0.15.2
86
+ MarkupSafe==2.1.5
87
+ charset-normalizer==3.4.4
88
+ frozenlist==1.8.0
89
+ Brotli==1.0.9
90
+ certifi==2026.1.4
91
+ mpmath==1.3.0
92
+ protobuf==3.20.1
93
+ tqdm==4.67.3
94
+ nvidia-cusparse-cu12==12.1.0.106
95
+ attrs==25.4.0
96
+ appdirs==1.4.4
97
+ flash_attn==2.7.4.post1
98
+ aiohttp==3.13.3
99
+ filelock==3.20.0
100
+ idna==3.11
101
+ nvidia-curand-cu12==10.3.2.106
102
+ zipp==3.23.0
103
+ backports.tarfile==1.2.0
104
+ autocommand==2.2.2
105
+ platformdirs==4.4.0
106
+ jaraco.text==4.0.0
107
+ more-itertools==10.8.0
108
+ jaraco.context==6.1.0
109
+ jaraco.functools==4.4.0
110
+ wheel==0.46.3
111
+ tomli==2.4.0
112
+ packaging==26.0
113
+ importlib_metadata==8.7.1
wandb/run-20260225_094307-hmhb8ltr/files/wandb-metadata.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-3.10.0-957.el7.x86_64-x86_64-with-glibc2.17",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2026-02-25T01:43:07.009034Z",
5
+ "args": [
6
+ "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/resolved_train_config.json"
7
+ ],
8
+ "program": "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py",
9
+ "codePath": "InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py",
10
+ "codePathLocal": "internvl_cleaned/train/internvl_chat_finetune.py",
11
+ "git": {
12
+ "remote": "git@github.com:Yuxin916/CL_CoTNav.git",
13
+ "commit": "1b65602e555ba143e86856602a3e22083cba01d4"
14
+ },
15
+ "email": "caiy0039@e.ntu.edu.sg",
16
+ "root": "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY",
17
+ "host": "SH-IDC1-10-140-37-31",
18
+ "executable": "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/bin/python3.10",
19
+ "cpu_count": 64,
20
+ "cpu_count_logical": 128,
21
+ "gpu": "NVIDIA A100-SXM4-80GB",
22
+ "gpu_count": 8,
23
+ "disk": {
24
+ "/": {
25
+ "total": "524945911808",
26
+ "used": "39849480192"
27
+ }
28
+ },
29
+ "memory": {
30
+ "total": "1081627848704"
31
+ },
32
+ "gpu_nvidia": [
33
+ {
34
+ "name": "NVIDIA A100-SXM4-80GB",
35
+ "memoryTotal": "85899345920",
36
+ "cudaCores": 6912,
37
+ "architecture": "Ampere",
38
+ "uuid": "GPU-828b568b-c82d-b423-1a34-542c520b513a"
39
+ },
40
+ {
41
+ "name": "NVIDIA A100-SXM4-80GB",
42
+ "memoryTotal": "85899345920",
43
+ "cudaCores": 6912,
44
+ "architecture": "Ampere",
45
+ "uuid": "GPU-5e4a67ae-0b1a-fef4-d35c-a727201c09bc"
46
+ },
47
+ {
48
+ "name": "NVIDIA A100-SXM4-80GB",
49
+ "memoryTotal": "85899345920",
50
+ "cudaCores": 6912,
51
+ "architecture": "Ampere",
52
+ "uuid": "GPU-0a457f54-5e48-dcbb-8f0e-26fa26366156"
53
+ },
54
+ {
55
+ "name": "NVIDIA A100-SXM4-80GB",
56
+ "memoryTotal": "85899345920",
57
+ "cudaCores": 6912,
58
+ "architecture": "Ampere",
59
+ "uuid": "GPU-27a5a9a9-701a-7f08-6cc8-898c967fc7d1"
60
+ },
61
+ {
62
+ "name": "NVIDIA A100-SXM4-80GB",
63
+ "memoryTotal": "85899345920",
64
+ "cudaCores": 6912,
65
+ "architecture": "Ampere",
66
+ "uuid": "GPU-778a3a47-8b90-df02-6d77-59c305de90f4"
67
+ },
68
+ {
69
+ "name": "NVIDIA A100-SXM4-80GB",
70
+ "memoryTotal": "85899345920",
71
+ "cudaCores": 6912,
72
+ "architecture": "Ampere",
73
+ "uuid": "GPU-d9f06099-a191-6157-a52f-43aa09175792"
74
+ },
75
+ {
76
+ "name": "NVIDIA A100-SXM4-80GB",
77
+ "memoryTotal": "85899345920",
78
+ "cudaCores": 6912,
79
+ "architecture": "Ampere",
80
+ "uuid": "GPU-e9da788d-da37-714d-ab0e-c3d8824cf614"
81
+ },
82
+ {
83
+ "name": "NVIDIA A100-SXM4-80GB",
84
+ "memoryTotal": "85899345920",
85
+ "cudaCores": 6912,
86
+ "architecture": "Ampere",
87
+ "uuid": "GPU-0c799f3d-966a-4baa-88cd-540cff322767"
88
+ }
89
+ ],
90
+ "cudaVersion": "12.2",
91
+ "slurm": {
92
+ "cluster_name": "cluster_sproject3",
93
+ "conf": "/etc/slurm/slurm.conf",
94
+ "cpus_on_node": "128",
95
+ "cpus_per_task": "8",
96
+ "distribution": "cyclic",
97
+ "gtids": "0",
98
+ "job_account": "research",
99
+ "job_cpus_per_node": "128",
100
+ "job_cpus_per_node_pack_group_0": "128",
101
+ "job_gid": "200000139",
102
+ "job_gpus": "0,1,2,3,4,5,6,7",
103
+ "job_id": "7465487",
104
+ "job_name": "vlm_ft",
105
+ "job_nodelist": "SH-IDC1-10-140-37-31",
106
+ "job_num_nodes": "1",
107
+ "job_partition": "interntmp",
108
+ "job_qos": "normal",
109
+ "job_uid": "200000139",
110
+ "job_user": "wangmaonan",
111
+ "jobid": "7465487",
112
+ "launch_node_ipaddr": "10.140.37.31",
113
+ "localid": "0",
114
+ "mem_per_node": "49152",
115
+ "nnodes": "1",
116
+ "node_aliases": "(null)",
117
+ "nodeid": "0",
118
+ "nodelist": "SH-IDC1-10-140-37-31",
119
+ "nprocs": "1",
120
+ "ntasks": "1",
121
+ "ntasks_per_node": "1",
122
+ "prio_process": "0",
123
+ "procid": "0",
124
+ "srun_comm_host": "10.140.37.31",
125
+ "srun_comm_port": "45814",
126
+ "step_gpus": "0,1,2,3",
127
+ "step_id": "0",
128
+ "step_launcher_port": "45814",
129
+ "step_nodelist": "SH-IDC1-10-140-37-31",
130
+ "step_num_nodes": "1",
131
+ "step_num_tasks": "1",
132
+ "step_tasks_per_node": "1",
133
+ "stepid": "0",
134
+ "submit_dir": "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav",
135
+ "submit_host": "SH-IDC1-10-140-37-31",
136
+ "task_pid": "188600",
137
+ "tasks_per_node": "1",
138
+ "topology_addr": "SH-IDC1-10-140-37-31",
139
+ "topology_addr_pattern": "node",
140
+ "umask": "0002",
141
+ "working_cluster": "cluster_sproject3:SH-IDC1-10-140-37-161:6817:9216:109"
142
+ },
143
+ "writerId": "89yi1atsuhvihf32z0oklebiq6eazrep"
144
+ }
wandb/run-20260225_094307-hmhb8ltr/logs/debug-core.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2026-02-25T09:43:07.172368186+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp9xle39pt/port-189155.txt","pid":189155,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2026-02-25T09:43:07.17529052+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":189155}
3
+ {"time":"2026-02-25T09:43:07.175438935+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-189155-192110-2041067869/socket","Net":"unix"}}
4
+ {"time":"2026-02-25T09:43:07.249935403+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2026-02-25T09:43:07.258572841+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"hmhb8ltr","id":"1(@)"}
6
+ {"time":"2026-02-25T09:43:07.683227166+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"hmhb8ltr","id":"1(@)"}
7
+ {"time":"2026-02-25T11:09:21.665063902+08:00","level":"INFO","msg":"server: parent process exited, terminating service process"}
wandb/run-20260225_094307-hmhb8ltr/logs/debug-internal.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-02-25T09:43:07.25942373+08:00","level":"INFO","msg":"stream: starting","core version":"0.22.3"}
2
+ {"time":"2026-02-25T09:43:07.67936989+08:00","level":"INFO","msg":"stream: created new stream","id":"hmhb8ltr"}
3
+ {"time":"2026-02-25T09:43:07.679921274+08:00","level":"INFO","msg":"handler: started","stream_id":"hmhb8ltr"}
4
+ {"time":"2026-02-25T09:43:07.682825049+08:00","level":"INFO","msg":"stream: started","id":"hmhb8ltr"}
5
+ {"time":"2026-02-25T09:43:07.682829933+08:00","level":"INFO","msg":"writer: started","stream_id":"hmhb8ltr"}
6
+ {"time":"2026-02-25T09:43:07.682846777+08:00","level":"INFO","msg":"sender: started","stream_id":"hmhb8ltr"}
7
+ {"time":"2026-02-25T10:08:08.481616394+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
8
+ {"time":"2026-02-25T10:08:10.152924176+08:00","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":1579}
9
+ {"time":"2026-02-25T10:08:34.648103405+08:00","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":29}
10
+ {"time":"2026-02-25T10:09:08.495965349+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
11
+ {"time":"2026-02-25T10:09:13.630775482+08:00","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":1657}
12
+ {"time":"2026-02-25T10:09:28.215288926+08:00","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":29}
13
+ {"time":"2026-02-25T10:46:53.782709329+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/tsaisplus-nanyang-technological-university-singapore/prompt_revision/hmhb8ltr/file_stream\": unexpected EOF"}
14
+ {"time":"2026-02-25T10:47:16.973717222+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/tsaisplus-nanyang-technological-university-singapore/prompt_revision/hmhb8ltr/file_stream\": unexpected EOF"}
wandb/run-20260225_094307-hmhb8ltr/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-02-25 09:43:07,025 INFO MainThread:189155 [wandb_setup.py:_flush():81] Current SDK version is 0.22.3
2
+ 2026-02-25 09:43:07,026 INFO MainThread:189155 [wandb_setup.py:_flush():81] Configure stats pid to 189155
3
+ 2026-02-25 09:43:07,027 INFO MainThread:189155 [wandb_setup.py:_flush():81] Loading settings from /mnt/petrelfs/wangmaonan/.config/wandb/settings
4
+ 2026-02-25 09:43:07,027 INFO MainThread:189155 [wandb_setup.py:_flush():81] Loading settings from /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/wandb/settings
5
+ 2026-02-25 09:43:07,028 INFO MainThread:189155 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2026-02-25 09:43:07,028 INFO MainThread:189155 [wandb_init.py:setup_run_log_directory():706] Logging user logs to /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/wandb/run-20260225_094307-hmhb8ltr/logs/debug.log
7
+ 2026-02-25 09:43:07,029 INFO MainThread:189155 [wandb_init.py:setup_run_log_directory():707] Logging internal logs to /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/wandb/run-20260225_094307-hmhb8ltr/logs/debug-internal.log
8
+ 2026-02-25 09:43:07,029 INFO MainThread:189155 [wandb_init.py:init():833] calling init triggers
9
+ 2026-02-25 09:43:07,030 INFO MainThread:189155 [wandb_init.py:init():838] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2026-02-25 09:43:07,030 INFO MainThread:189155 [wandb_init.py:init():881] starting backend
12
+ 2026-02-25 09:43:07,250 INFO MainThread:189155 [wandb_init.py:init():884] sending inform_init request
13
+ 2026-02-25 09:43:07,256 INFO MainThread:189155 [wandb_init.py:init():892] backend started and connected
14
+ 2026-02-25 09:43:07,257 INFO MainThread:189155 [wandb_init.py:init():962] updated telemetry
15
+ 2026-02-25 09:43:07,284 INFO MainThread:189155 [wandb_init.py:init():986] communicating run to backend with 90.0 second timeout
16
+ 2026-02-25 09:43:08,178 INFO MainThread:189155 [wandb_init.py:init():1033] starting run threads in backend
17
+ 2026-02-25 09:43:08,469 INFO MainThread:189155 [wandb_run.py:_console_start():2506] atexit reg
18
+ 2026-02-25 09:43:08,470 INFO MainThread:189155 [wandb_run.py:_redirect():2354] redirect: wrap_raw
19
+ 2026-02-25 09:43:08,471 INFO MainThread:189155 [wandb_run.py:_redirect():2423] Wrapping output streams.
20
+ 2026-02-25 09:43:08,471 INFO MainThread:189155 [wandb_run.py:_redirect():2446] Redirects installed.
21
+ 2026-02-25 09:43:08,477 INFO MainThread:189155 [wandb_init.py:init():1073] run started, returning control to user process
22
+ 2026-02-25 09:43:08,480 INFO MainThread:189155 [wandb_run.py:_config_callback():1390] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['InternVLChatModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': None, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '../pretrained/InternVL3-2B', '_commit_hash': None, '_attn_implementation_internal': None, 'transformers_version': None, 'auto_map': {'AutoConfig': 'configuration_internvl_chat.InternVLChatConfig', 'AutoModel': 'modeling_internvl_chat.InternVLChatModel', 'AutoModelForCausalLM': 'modeling_internvl_chat.InternVLChatModel'}, 'hidden_size': 1536, 'image_fold': None, 'model_type': 'internvl_chat', 'system_message': 'You are an autonomous navigation agent operating in indoor environments. You receive spatial information through position embeddings injected into visual features and text tokens. Use the BEV map, position embeddings, and semantic information to make navigation decisions. When the target object is detected (<target> marker), navigate directly to it. Otherwise, explore frontiers strategically to find the goal object.', 'vision_config': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': True, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['InternVisionModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': None, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'OpenGVLab/InternViT-6B-448px-V1-5', 'transformers_version': '4.37.2', '_attn_implementation_autoset': True, 'auto_map': {'AutoConfig': 'configuration_intern_vit.InternVisionConfig', 'AutoModel': 'modeling_intern_vit.InternVisionModel'}, 'capacity_factor': 1.2, 'eval_capacity_factor': 1.4, 'laux_allreduce': 'all_nodes', 'model_type': 'intern_vit_6b', 'moe_coeff_ratio': 0.5, 'moe_intermediate_size': 768, 'moe_output_scale': 4.0, 'noisy_gate_policy': 'RSample_before', 'num_experts': 8, 'num_routed_experts': 4, 'num_shared_experts': 4, 'shared_expert_intermediate_size': 3072, 'use_moe': False, 'use_residual': True, 'use_rts': False, 'use_weighted_residual': False, 'hidden_size': 1024, 'intermediate_size': 4096, 'dropout': 0.0, 'drop_path_rate': 0.0, 'num_hidden_layers': 24, 'num_attention_heads': 16, 'num_channels': 3, 'patch_size': 14, 'image_size': 448, 'initializer_range': 1e-10, 'initializer_factor': 0.1, 'attention_dropout': 0.0, 'layer_norm_eps': 1e-06, 'hidden_act': 'gelu', 'norm_type': 'layer_norm', 'qkv_bias': True, 'qk_normalization': False, 'use_flash_attn': True}, 'llm_config': {'vocab_size': 151677, 'max_position_embeddings': 32768, 'hidden_size': 1536, 'intermediate_size': 8960, 'num_hidden_layers': 28, 'num_attention_heads': 12, 'use_sliding_window': False, 'sliding_window': None, 'max_window_layers': 70, 'num_key_value_heads': 2, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-06, 'use_cache': False, 'rope_theta': 1000000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': True, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['Qwen2ForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 151643, 'pad_token_id': None, 'eos_token_id': 151643, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './pretrained/Qwen2.5-32B-Instruct', 'transformers_version': '4.37.2', '_attn_implementation_autoset': True, 'model_type': 'qwen2', 'moe_config': None, 'rope_scaling': {'factor': 2.0, 'rope_type': 'dynamic', 'type': 'dynamic'}, 'attn_implementation': 'flash_attention_2'}, 'use_backbone_lora': 0, 'use_llm_lora': 64, 'pad2square': False, 'select_layer': -1, 'force_image_size': 448, 'downsample_ratio': 0.5, 'template': 'internvl2_5_nav', 'dynamic_image_size': False, 'use_thumbnail': True, 'ps_version': 'v2', 'min_dynamic_patch': 1, 'max_dynamic_patch': 12, 'num_image_token_bev': 256, 'num_image_token_ego': 32, 'use_pairwise_spatial_encoder': False, 'use_position_embeddings': True, 'dual_text_pos_injection': True, 'bev_image_size': 448, 'vit_bev_freeze': True, 'vit_bev_use_lora': True, 'vit_bev_lora_rank': 64, 'vit_rgb_freeze': True, 'vit_rgb_use_lora': True, 'vit_rgb_lora_rank': 16, 'output_dir': '/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': 9300, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'runs/Feb25_09-42-17_SH-IDC1-10-140-37-31', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 0.5, 'save_total_limit': 2, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 2, 'past_index': -1, 'run_name': 'a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY_steps9300_gpus4_acc1', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': 'zero_stage2_config_acc1.json', 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
wandb/run-20260225_094307-hmhb8ltr/run-hmhb8ltr.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0beac92a32c1613762e91fc2beded5715934014b046b8dba88331bb08c80fa7
3
+ size 1966080
wandb/run-20260225_111518-pg2w7c3p/files/config.yaml ADDED
@@ -0,0 +1,767 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _attn_implementation_internal:
2
+ value: null
3
+ _commit_hash:
4
+ value: null
5
+ _name_or_path:
6
+ value: ../pretrained/InternVL3-2B
7
+ _wandb:
8
+ value:
9
+ cli_version: 0.22.3
10
+ e:
11
+ hpt2lvw45stdjl0sezw62w5lspi4bemx:
12
+ args:
13
+ - /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/resolved_train_config.json
14
+ codePath: InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py
15
+ codePathLocal: internvl_cleaned/train/internvl_chat_finetune.py
16
+ cpu_count: 64
17
+ cpu_count_logical: 128
18
+ cudaVersion: "12.2"
19
+ disk:
20
+ /:
21
+ total: "524945911808"
22
+ used: "33262657536"
23
+ email: caiy0039@e.ntu.edu.sg
24
+ executable: /mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/bin/python3.10
25
+ git:
26
+ commit: 003f6c01351c079c8c0476f53ac6212bc5573444
27
+ remote: git@github.com:Yuxin916/CL_CoTNav.git
28
+ gpu: NVIDIA A100-SXM4-80GB
29
+ gpu_count: 8
30
+ gpu_nvidia:
31
+ - architecture: Ampere
32
+ cudaCores: 6912
33
+ memoryTotal: "85899345920"
34
+ name: NVIDIA A100-SXM4-80GB
35
+ uuid: GPU-0360b5f7-c6ac-dc17-a431-8ebfcce4c66d
36
+ - architecture: Ampere
37
+ cudaCores: 6912
38
+ memoryTotal: "85899345920"
39
+ name: NVIDIA A100-SXM4-80GB
40
+ uuid: GPU-56e8ab97-6913-5234-1a79-ff5ea4dfa2db
41
+ - architecture: Ampere
42
+ cudaCores: 6912
43
+ memoryTotal: "85899345920"
44
+ name: NVIDIA A100-SXM4-80GB
45
+ uuid: GPU-1dca27df-e145-5a54-2a2f-f5b58257f3ee
46
+ - architecture: Ampere
47
+ cudaCores: 6912
48
+ memoryTotal: "85899345920"
49
+ name: NVIDIA A100-SXM4-80GB
50
+ uuid: GPU-92c24b01-1351-1c21-e267-73234c73be2f
51
+ - architecture: Ampere
52
+ cudaCores: 6912
53
+ memoryTotal: "85899345920"
54
+ name: NVIDIA A100-SXM4-80GB
55
+ uuid: GPU-2af0eb8b-44c0-74eb-b3d3-d5107ccb6aa1
56
+ - architecture: Ampere
57
+ cudaCores: 6912
58
+ memoryTotal: "85899345920"
59
+ name: NVIDIA A100-SXM4-80GB
60
+ uuid: GPU-89cf5df2-4951-2e7b-3ee7-3d0fdafc5d07
61
+ - architecture: Ampere
62
+ cudaCores: 6912
63
+ memoryTotal: "85899345920"
64
+ name: NVIDIA A100-SXM4-80GB
65
+ uuid: GPU-f20f8083-ef3d-ef3f-7c2f-3244a690c7b6
66
+ - architecture: Ampere
67
+ cudaCores: 6912
68
+ memoryTotal: "85899345920"
69
+ name: NVIDIA A100-SXM4-80GB
70
+ uuid: GPU-ab6cfaee-4d49-f6ee-3218-4f47bfeed414
71
+ host: SH-IDC1-10-140-37-90
72
+ memory:
73
+ total: "1081627832320"
74
+ os: Linux-3.10.0-957.el7.x86_64-x86_64-with-glibc2.17
75
+ program: /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py
76
+ python: CPython 3.10.18
77
+ root: /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY
78
+ slurm:
79
+ cluster_name: cluster_sproject3
80
+ conf: /etc/slurm/slurm.conf
81
+ cpus_on_node: "8"
82
+ cpus_per_task: "8"
83
+ distribution: cyclic
84
+ gtids: "0"
85
+ job_account: research
86
+ job_cpus_per_node: "8"
87
+ job_cpus_per_node_pack_group_0: "8"
88
+ job_gid: "200000139"
89
+ job_gpus: 0,1,2,3
90
+ job_id: "7466527"
91
+ job_name: vlm_ft
92
+ job_nodelist: SH-IDC1-10-140-37-90
93
+ job_num_nodes: "1"
94
+ job_partition: interntmp
95
+ job_qos: normal
96
+ job_uid: "200000139"
97
+ job_user: wangmaonan
98
+ jobid: "7466527"
99
+ launch_node_ipaddr: 10.140.37.90
100
+ localid: "0"
101
+ mem_per_node: "49152"
102
+ nnodes: "1"
103
+ node_aliases: (null)
104
+ nodeid: "0"
105
+ nodelist: SH-IDC1-10-140-37-90
106
+ nprocs: "1"
107
+ ntasks: "1"
108
+ ntasks_per_node: "1"
109
+ prio_process: "0"
110
+ procid: "0"
111
+ srun_comm_host: 10.140.37.90
112
+ srun_comm_port: "43483"
113
+ step_gpus: 0,1,2,3
114
+ step_id: "0"
115
+ step_launcher_port: "43483"
116
+ step_nodelist: SH-IDC1-10-140-37-90
117
+ step_num_nodes: "1"
118
+ step_num_tasks: "1"
119
+ step_tasks_per_node: "1"
120
+ stepid: "0"
121
+ submit_dir: /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav
122
+ submit_host: SH-IDC1-10-140-37-90
123
+ task_pid: "120843"
124
+ tasks_per_node: "1"
125
+ topology_addr: SH-IDC1-10-140-37-90
126
+ topology_addr_pattern: node
127
+ umask: "0002"
128
+ working_cluster: cluster_sproject3:SH-IDC1-10-140-37-161:6817:9216:109
129
+ startedAt: "2026-02-25T03:15:18.014369Z"
130
+ writerId: hpt2lvw45stdjl0sezw62w5lspi4bemx
131
+ m:
132
+ - "1": train/global_step
133
+ "6":
134
+ - 3
135
+ "7": []
136
+ - "2": '*'
137
+ "5": 1
138
+ "6":
139
+ - 1
140
+ "7": []
141
+ python_version: 3.10.18
142
+ t:
143
+ "1":
144
+ - 1
145
+ - 11
146
+ - 41
147
+ - 49
148
+ - 51
149
+ - 63
150
+ - 71
151
+ - 98
152
+ - 105
153
+ "2":
154
+ - 1
155
+ - 11
156
+ - 41
157
+ - 49
158
+ - 51
159
+ - 63
160
+ - 71
161
+ - 98
162
+ - 105
163
+ "3":
164
+ - 7
165
+ - 13
166
+ - 66
167
+ "4": 3.10.18
168
+ "5": 0.22.3
169
+ "6": 4.37.2
170
+ "9":
171
+ "1": transformers_trainer
172
+ "12": 0.22.3
173
+ "13": linux-x86_64
174
+ adafactor:
175
+ value: false
176
+ adam_beta1:
177
+ value: 0.9
178
+ adam_beta2:
179
+ value: 0.999
180
+ adam_epsilon:
181
+ value: 1e-08
182
+ add_cross_attention:
183
+ value: false
184
+ architectures:
185
+ value:
186
+ - InternVLChatModel
187
+ auto_find_batch_size:
188
+ value: false
189
+ auto_map:
190
+ value:
191
+ AutoConfig: configuration_internvl_chat.InternVLChatConfig
192
+ AutoModel: modeling_internvl_chat.InternVLChatModel
193
+ AutoModelForCausalLM: modeling_internvl_chat.InternVLChatModel
194
+ bad_words_ids:
195
+ value: null
196
+ begin_suppress_tokens:
197
+ value: null
198
+ bev_image_size:
199
+ value: 448
200
+ bf16:
201
+ value: true
202
+ bf16_full_eval:
203
+ value: false
204
+ bos_token_id:
205
+ value: null
206
+ chunk_size_feed_forward:
207
+ value: 0
208
+ cross_attention_hidden_size:
209
+ value: null
210
+ data_seed:
211
+ value: null
212
+ dataloader_drop_last:
213
+ value: false
214
+ dataloader_num_workers:
215
+ value: 2
216
+ dataloader_persistent_workers:
217
+ value: false
218
+ dataloader_pin_memory:
219
+ value: true
220
+ ddp_backend:
221
+ value: null
222
+ ddp_broadcast_buffers:
223
+ value: null
224
+ ddp_bucket_cap_mb:
225
+ value: null
226
+ ddp_find_unused_parameters:
227
+ value: null
228
+ ddp_timeout:
229
+ value: 1800
230
+ debug:
231
+ value: []
232
+ decoder_start_token_id:
233
+ value: null
234
+ deepspeed:
235
+ value: zero_stage2_config_acc1.json
236
+ disable_tqdm:
237
+ value: false
238
+ dispatch_batches:
239
+ value: null
240
+ diversity_penalty:
241
+ value: 0
242
+ do_eval:
243
+ value: false
244
+ do_predict:
245
+ value: false
246
+ do_sample:
247
+ value: false
248
+ do_train:
249
+ value: true
250
+ downsample_ratio:
251
+ value: 0.5
252
+ dual_text_pos_injection:
253
+ value: true
254
+ dynamic_image_size:
255
+ value: false
256
+ early_stopping:
257
+ value: false
258
+ encoder_no_repeat_ngram_size:
259
+ value: 0
260
+ eos_token_id:
261
+ value: null
262
+ eval_accumulation_steps:
263
+ value: null
264
+ eval_delay:
265
+ value: 0
266
+ eval_steps:
267
+ value: null
268
+ evaluation_strategy:
269
+ value: "no"
270
+ exponential_decay_length_penalty:
271
+ value: null
272
+ finetuning_task:
273
+ value: null
274
+ force_image_size:
275
+ value: 448
276
+ forced_bos_token_id:
277
+ value: null
278
+ forced_eos_token_id:
279
+ value: null
280
+ fp16:
281
+ value: false
282
+ fp16_backend:
283
+ value: auto
284
+ fp16_full_eval:
285
+ value: false
286
+ fp16_opt_level:
287
+ value: O1
288
+ fsdp:
289
+ value: []
290
+ fsdp_config:
291
+ value:
292
+ min_num_params: 0
293
+ xla: false
294
+ xla_fsdp_grad_ckpt: false
295
+ fsdp_min_num_params:
296
+ value: 0
297
+ fsdp_transformer_layer_cls_to_wrap:
298
+ value: null
299
+ full_determinism:
300
+ value: false
301
+ gradient_accumulation_steps:
302
+ value: 1
303
+ gradient_checkpointing:
304
+ value: true
305
+ gradient_checkpointing_kwargs:
306
+ value: null
307
+ greater_is_better:
308
+ value: null
309
+ group_by_length:
310
+ value: false
311
+ half_precision_backend:
312
+ value: auto
313
+ hidden_size:
314
+ value: 1536
315
+ hub_always_push:
316
+ value: false
317
+ hub_model_id:
318
+ value: null
319
+ hub_private_repo:
320
+ value: false
321
+ hub_strategy:
322
+ value: every_save
323
+ hub_token:
324
+ value: <HUB_TOKEN>
325
+ id2label:
326
+ value:
327
+ "0": LABEL_0
328
+ "1": LABEL_1
329
+ ignore_data_skip:
330
+ value: false
331
+ image_fold:
332
+ value: null
333
+ include_inputs_for_metrics:
334
+ value: false
335
+ include_num_input_tokens_seen:
336
+ value: false
337
+ include_tokens_per_second:
338
+ value: false
339
+ is_decoder:
340
+ value: false
341
+ is_encoder_decoder:
342
+ value: false
343
+ jit_mode_eval:
344
+ value: false
345
+ label_names:
346
+ value: null
347
+ label_smoothing_factor:
348
+ value: 0
349
+ label2id:
350
+ value:
351
+ LABEL_0: 0
352
+ LABEL_1: 1
353
+ learning_rate:
354
+ value: 0.0001
355
+ length_column_name:
356
+ value: length
357
+ length_penalty:
358
+ value: 1
359
+ llm_config:
360
+ value:
361
+ _attn_implementation_autoset: true
362
+ _name_or_path: ./pretrained/Qwen2.5-32B-Instruct
363
+ add_cross_attention: false
364
+ architectures:
365
+ - Qwen2ForCausalLM
366
+ attention_dropout: 0
367
+ attn_implementation: flash_attention_2
368
+ bad_words_ids: null
369
+ begin_suppress_tokens: null
370
+ bos_token_id: 151643
371
+ chunk_size_feed_forward: 0
372
+ cross_attention_hidden_size: null
373
+ decoder_start_token_id: null
374
+ diversity_penalty: 0
375
+ do_sample: false
376
+ early_stopping: false
377
+ encoder_no_repeat_ngram_size: 0
378
+ eos_token_id: 151643
379
+ exponential_decay_length_penalty: null
380
+ finetuning_task: null
381
+ forced_bos_token_id: null
382
+ forced_eos_token_id: null
383
+ hidden_act: silu
384
+ hidden_size: 1536
385
+ id2label:
386
+ "0": LABEL_0
387
+ "1": LABEL_1
388
+ initializer_range: 0.02
389
+ intermediate_size: 8960
390
+ is_decoder: false
391
+ is_encoder_decoder: false
392
+ label2id:
393
+ LABEL_0: 0
394
+ LABEL_1: 1
395
+ length_penalty: 1
396
+ max_length: 20
397
+ max_position_embeddings: 32768
398
+ max_window_layers: 70
399
+ min_length: 0
400
+ model_type: qwen2
401
+ moe_config: null
402
+ no_repeat_ngram_size: 0
403
+ num_attention_heads: 12
404
+ num_beam_groups: 1
405
+ num_beams: 1
406
+ num_hidden_layers: 28
407
+ num_key_value_heads: 2
408
+ num_return_sequences: 1
409
+ output_attentions: false
410
+ output_hidden_states: false
411
+ output_scores: false
412
+ pad_token_id: null
413
+ prefix: null
414
+ problem_type: null
415
+ remove_invalid_values: false
416
+ repetition_penalty: 1
417
+ return_dict: true
418
+ return_dict_in_generate: false
419
+ rms_norm_eps: 1e-06
420
+ rope_scaling:
421
+ factor: 2
422
+ rope_type: dynamic
423
+ type: dynamic
424
+ rope_theta: 1e+06
425
+ sep_token_id: null
426
+ sliding_window: null
427
+ suppress_tokens: null
428
+ task_specific_params: null
429
+ temperature: 1
430
+ tf_legacy_loss: false
431
+ tie_encoder_decoder: false
432
+ tie_word_embeddings: false
433
+ tokenizer_class: null
434
+ top_k: 50
435
+ top_p: 1
436
+ torch_dtype: bfloat16
437
+ torchscript: false
438
+ transformers_version: 4.37.2
439
+ typical_p: 1
440
+ use_bfloat16: true
441
+ use_cache: false
442
+ use_sliding_window: false
443
+ vocab_size: 151677
444
+ load_best_model_at_end:
445
+ value: false
446
+ local_rank:
447
+ value: 0
448
+ log_level:
449
+ value: passive
450
+ log_level_replica:
451
+ value: warning
452
+ log_on_each_node:
453
+ value: true
454
+ logging_dir:
455
+ value: runs/Feb25_11-14-26_SH-IDC1-10-140-37-90
456
+ logging_first_step:
457
+ value: false
458
+ logging_nan_inf_filter:
459
+ value: true
460
+ logging_steps:
461
+ value: 1
462
+ logging_strategy:
463
+ value: steps
464
+ lr_scheduler_type:
465
+ value: cosine
466
+ max_dynamic_patch:
467
+ value: 12
468
+ max_grad_norm:
469
+ value: 1
470
+ max_length:
471
+ value: 20
472
+ max_steps:
473
+ value: 9300
474
+ metric_for_best_model:
475
+ value: null
476
+ min_dynamic_patch:
477
+ value: 1
478
+ min_length:
479
+ value: 0
480
+ model_type:
481
+ value: internvl_chat
482
+ mp_parameters:
483
+ value: ""
484
+ neftune_noise_alpha:
485
+ value: null
486
+ no_cuda:
487
+ value: false
488
+ no_repeat_ngram_size:
489
+ value: 0
490
+ num_beam_groups:
491
+ value: 1
492
+ num_beams:
493
+ value: 1
494
+ num_image_token_bev:
495
+ value: 256
496
+ num_image_token_ego:
497
+ value: 32
498
+ num_return_sequences:
499
+ value: 1
500
+ num_train_epochs:
501
+ value: 1
502
+ optim:
503
+ value: adamw_torch
504
+ optim_args:
505
+ value: null
506
+ output_attentions:
507
+ value: false
508
+ output_dir:
509
+ value: /mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY
510
+ output_hidden_states:
511
+ value: false
512
+ output_scores:
513
+ value: false
514
+ overwrite_output_dir:
515
+ value: true
516
+ pad_token_id:
517
+ value: null
518
+ pad2square:
519
+ value: false
520
+ past_index:
521
+ value: -1
522
+ per_device_eval_batch_size:
523
+ value: 8
524
+ per_device_train_batch_size:
525
+ value: 1
526
+ per_gpu_eval_batch_size:
527
+ value: null
528
+ per_gpu_train_batch_size:
529
+ value: null
530
+ prediction_loss_only:
531
+ value: false
532
+ prefix:
533
+ value: null
534
+ problem_type:
535
+ value: null
536
+ ps_version:
537
+ value: v2
538
+ push_to_hub:
539
+ value: false
540
+ push_to_hub_model_id:
541
+ value: null
542
+ push_to_hub_organization:
543
+ value: null
544
+ push_to_hub_token:
545
+ value: <PUSH_TO_HUB_TOKEN>
546
+ ray_scope:
547
+ value: last
548
+ remove_invalid_values:
549
+ value: false
550
+ remove_unused_columns:
551
+ value: false
552
+ repetition_penalty:
553
+ value: 1
554
+ report_to:
555
+ value:
556
+ - wandb
557
+ resume_from_checkpoint:
558
+ value: null
559
+ return_dict:
560
+ value: true
561
+ return_dict_in_generate:
562
+ value: false
563
+ run_name:
564
+ value: a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY_steps9300_gpus4_acc1
565
+ save_on_each_node:
566
+ value: false
567
+ save_only_model:
568
+ value: false
569
+ save_safetensors:
570
+ value: true
571
+ save_steps:
572
+ value: 0.5
573
+ save_strategy:
574
+ value: steps
575
+ save_total_limit:
576
+ value: 1
577
+ seed:
578
+ value: 42
579
+ select_layer:
580
+ value: -1
581
+ sep_token_id:
582
+ value: null
583
+ skip_memory_metrics:
584
+ value: true
585
+ split_batches:
586
+ value: false
587
+ suppress_tokens:
588
+ value: null
589
+ system_message:
590
+ value: You are an autonomous navigation agent operating in indoor environments. You receive spatial information through position embeddings injected into visual features and text tokens. Use the BEV map, position embeddings, and semantic information to make navigation decisions. When the target object is detected (<target> marker), navigate directly to it. Otherwise, explore frontiers strategically to find the goal object.
591
+ task_specific_params:
592
+ value: null
593
+ temperature:
594
+ value: 1
595
+ template:
596
+ value: internvl2_5_nav
597
+ tf_legacy_loss:
598
+ value: false
599
+ tf32:
600
+ value: null
601
+ tie_encoder_decoder:
602
+ value: false
603
+ tie_word_embeddings:
604
+ value: false
605
+ tokenizer_class:
606
+ value: null
607
+ top_k:
608
+ value: 50
609
+ top_p:
610
+ value: 1
611
+ torch_compile:
612
+ value: false
613
+ torch_compile_backend:
614
+ value: null
615
+ torch_compile_mode:
616
+ value: null
617
+ torch_dtype:
618
+ value: torch.bfloat16
619
+ torchdynamo:
620
+ value: null
621
+ torchscript:
622
+ value: false
623
+ tpu_metrics_debug:
624
+ value: false
625
+ tpu_num_cores:
626
+ value: null
627
+ transformers_version:
628
+ value: null
629
+ typical_p:
630
+ value: 1
631
+ use_backbone_lora:
632
+ value: 0
633
+ use_bfloat16:
634
+ value: false
635
+ use_cpu:
636
+ value: false
637
+ use_ipex:
638
+ value: false
639
+ use_legacy_prediction_loop:
640
+ value: false
641
+ use_llm_lora:
642
+ value: 64
643
+ use_mps_device:
644
+ value: false
645
+ use_pairwise_spatial_encoder:
646
+ value: false
647
+ use_position_embeddings:
648
+ value: true
649
+ use_thumbnail:
650
+ value: true
651
+ vision_config:
652
+ value:
653
+ _attn_implementation_autoset: true
654
+ _name_or_path: OpenGVLab/InternViT-6B-448px-V1-5
655
+ add_cross_attention: false
656
+ architectures:
657
+ - InternVisionModel
658
+ attention_dropout: 0
659
+ auto_map:
660
+ AutoConfig: configuration_intern_vit.InternVisionConfig
661
+ AutoModel: modeling_intern_vit.InternVisionModel
662
+ bad_words_ids: null
663
+ begin_suppress_tokens: null
664
+ bos_token_id: null
665
+ capacity_factor: 1.2
666
+ chunk_size_feed_forward: 0
667
+ cross_attention_hidden_size: null
668
+ decoder_start_token_id: null
669
+ diversity_penalty: 0
670
+ do_sample: false
671
+ drop_path_rate: 0
672
+ dropout: 0
673
+ early_stopping: false
674
+ encoder_no_repeat_ngram_size: 0
675
+ eos_token_id: null
676
+ eval_capacity_factor: 1.4
677
+ exponential_decay_length_penalty: null
678
+ finetuning_task: null
679
+ forced_bos_token_id: null
680
+ forced_eos_token_id: null
681
+ hidden_act: gelu
682
+ hidden_size: 1024
683
+ id2label:
684
+ "0": LABEL_0
685
+ "1": LABEL_1
686
+ image_size: 448
687
+ initializer_factor: 0.1
688
+ initializer_range: 1e-10
689
+ intermediate_size: 4096
690
+ is_decoder: false
691
+ is_encoder_decoder: false
692
+ label2id:
693
+ LABEL_0: 0
694
+ LABEL_1: 1
695
+ laux_allreduce: all_nodes
696
+ layer_norm_eps: 1e-06
697
+ length_penalty: 1
698
+ max_length: 20
699
+ min_length: 0
700
+ model_type: intern_vit_6b
701
+ moe_coeff_ratio: 0.5
702
+ moe_intermediate_size: 768
703
+ moe_output_scale: 4
704
+ no_repeat_ngram_size: 0
705
+ noisy_gate_policy: RSample_before
706
+ norm_type: layer_norm
707
+ num_attention_heads: 16
708
+ num_beam_groups: 1
709
+ num_beams: 1
710
+ num_channels: 3
711
+ num_experts: 8
712
+ num_hidden_layers: 24
713
+ num_return_sequences: 1
714
+ num_routed_experts: 4
715
+ num_shared_experts: 4
716
+ output_attentions: false
717
+ output_hidden_states: false
718
+ output_scores: false
719
+ pad_token_id: null
720
+ patch_size: 14
721
+ prefix: null
722
+ problem_type: null
723
+ qk_normalization: false
724
+ qkv_bias: true
725
+ remove_invalid_values: false
726
+ repetition_penalty: 1
727
+ return_dict: true
728
+ return_dict_in_generate: false
729
+ sep_token_id: null
730
+ shared_expert_intermediate_size: 3072
731
+ suppress_tokens: null
732
+ task_specific_params: null
733
+ temperature: 1
734
+ tf_legacy_loss: false
735
+ tie_encoder_decoder: false
736
+ tie_word_embeddings: true
737
+ tokenizer_class: null
738
+ top_k: 50
739
+ top_p: 1
740
+ torch_dtype: bfloat16
741
+ torchscript: false
742
+ transformers_version: 4.37.2
743
+ typical_p: 1
744
+ use_bfloat16: true
745
+ use_flash_attn: true
746
+ use_moe: false
747
+ use_residual: true
748
+ use_rts: false
749
+ use_weighted_residual: false
750
+ vit_bev_freeze:
751
+ value: true
752
+ vit_bev_lora_rank:
753
+ value: 64
754
+ vit_bev_use_lora:
755
+ value: true
756
+ vit_rgb_freeze:
757
+ value: true
758
+ vit_rgb_lora_rank:
759
+ value: 16
760
+ vit_rgb_use_lora:
761
+ value: true
762
+ warmup_ratio:
763
+ value: 0.03
764
+ warmup_steps:
765
+ value: 0
766
+ weight_decay:
767
+ value: 0.01
wandb/run-20260225_111518-pg2w7c3p/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20260225_111518-pg2w7c3p/files/requirements.txt ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cuda-runtime-cu12==12.1.105
2
+ platformdirs==4.9.2
3
+ wandb==0.22.3
4
+ transformers==4.37.2
5
+ einops==0.8.2
6
+ accelerate==0.33.0
7
+ aiosignal==1.4.0
8
+ huggingface_hub==0.36.2
9
+ pycparser==2.22
10
+ multidict==6.7.1
11
+ urllib3==2.6.3
12
+ urllib3==2.5.0
13
+ aiohappyeyeballs==2.6.1
14
+ nvidia-cusolver-cu12==11.4.5.107
15
+ yarl==1.22.0
16
+ typing-inspection==0.4.2
17
+ gitdb==4.0.12
18
+ multiprocess==0.70.16
19
+ async-timeout==5.0.1
20
+ Jinja2==3.1.6
21
+ setuptools==82.0.0
22
+ torch==2.5.1+cu121
23
+ hyperframe==6.1.0
24
+ peft==0.10.0
25
+ tzdata==2025.3
26
+ dill==0.3.8
27
+ orjson==3.11.7
28
+ nvidia-cufft-cu12==11.0.2.54
29
+ propcache==0.4.1
30
+ PySocks==1.7.1
31
+ nvidia-cublas-cu12==12.1.3.1
32
+ nvidia-nccl-cu12==2.21.5
33
+ docker-pycreds==0.4.0
34
+ pydantic_core==2.41.5
35
+ nvidia-ml-py==13.590.48
36
+ ninja==1.13.0
37
+ cffi==1.15.0
38
+ nvidia-cuda-cupti-cu12==12.1.105
39
+ triton==3.1.0
40
+ annotated-types==0.7.0
41
+ nvidia-nvtx-cu12==12.1.105
42
+ timm==1.0.22
43
+ sympy==1.13.1
44
+ pydantic==2.12.5
45
+ xxhash==3.6.0
46
+ py-cpuinfo==9.0.0
47
+ sentry-sdk==2.53.0
48
+ networkx==3.4.2
49
+ click==8.3.1
50
+ regex==2026.1.15
51
+ pillow==12.0.0
52
+ zstandard==0.23.0
53
+ pyarrow==20.0.0
54
+ GitPython==3.1.46
55
+ wheel==0.46.3
56
+ fsspec==2025.3.0
57
+ typing_extensions==4.15.0
58
+ hf-xet==1.2.0
59
+ torchvision==0.20.1+cu121
60
+ python-dateutil==2.9.0.post0
61
+ numpy==1.26.4
62
+ smmap==5.0.2
63
+ setproctitle==1.2.3
64
+ safetensors==0.7.0
65
+ requests==2.32.5
66
+ hpack==4.1.0
67
+ six==1.17.0
68
+ modelscope==1.34.0
69
+ pip==26.0.1
70
+ h2==4.3.0
71
+ PyYAML==6.0.3
72
+ PyYAML==6.0
73
+ torchaudio==2.5.1+cu121
74
+ nvidia-nvjitlink-cu12==12.9.86
75
+ psutil==7.2.2
76
+ psutil==5.9.1
77
+ pandas==2.3.2
78
+ pytz==2025.2
79
+ nvidia-cudnn-cu12==9.1.0.70
80
+ packaging==25.0
81
+ hjson==3.1.0
82
+ deepspeed==0.14.4
83
+ datasets==4.0.0
84
+ nvidia-cuda-nvrtc-cu12==12.1.105
85
+ tokenizers==0.15.2
86
+ MarkupSafe==2.1.5
87
+ charset-normalizer==3.4.4
88
+ frozenlist==1.8.0
89
+ Brotli==1.0.9
90
+ certifi==2026.1.4
91
+ mpmath==1.3.0
92
+ protobuf==3.20.1
93
+ tqdm==4.67.3
94
+ nvidia-cusparse-cu12==12.1.0.106
95
+ attrs==25.4.0
96
+ appdirs==1.4.4
97
+ flash_attn==2.7.4.post1
98
+ aiohttp==3.13.3
99
+ filelock==3.20.0
100
+ idna==3.11
101
+ nvidia-curand-cu12==10.3.2.106
102
+ zipp==3.23.0
103
+ backports.tarfile==1.2.0
104
+ autocommand==2.2.2
105
+ platformdirs==4.4.0
106
+ jaraco.text==4.0.0
107
+ more-itertools==10.8.0
108
+ jaraco.context==6.1.0
109
+ jaraco.functools==4.4.0
110
+ wheel==0.46.3
111
+ tomli==2.4.0
112
+ packaging==26.0
113
+ importlib_metadata==8.7.1
wandb/run-20260225_111518-pg2w7c3p/files/wandb-metadata.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-3.10.0-957.el7.x86_64-x86_64-with-glibc2.17",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2026-02-25T03:15:18.014369Z",
5
+ "args": [
6
+ "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY/resolved_train_config.json"
7
+ ],
8
+ "program": "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py",
9
+ "codePath": "InternVL_cleaned/internvl_chat/internvl_cleaned/train/internvl_chat_finetune.py",
10
+ "codePathLocal": "internvl_cleaned/train/internvl_chat_finetune.py",
11
+ "git": {
12
+ "remote": "git@github.com:Yuxin916/CL_CoTNav.git",
13
+ "commit": "003f6c01351c079c8c0476f53ac6212bc5573444"
14
+ },
15
+ "email": "caiy0039@e.ntu.edu.sg",
16
+ "root": "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav/all_log/experiments/a100_dualvit_llm-64_mlp-train-patch-32768-acc1_BEVftFOV_FrontierRGB_PosB__FRONTIER_PIXEL_NUMBER_ONLY",
17
+ "host": "SH-IDC1-10-140-37-90",
18
+ "executable": "/mnt/petrelfs/wangmaonan/anaconda3/envs/cl_cotnav/bin/python3.10",
19
+ "cpu_count": 64,
20
+ "cpu_count_logical": 128,
21
+ "gpu": "NVIDIA A100-SXM4-80GB",
22
+ "gpu_count": 8,
23
+ "disk": {
24
+ "/": {
25
+ "total": "524945911808",
26
+ "used": "33262657536"
27
+ }
28
+ },
29
+ "memory": {
30
+ "total": "1081627832320"
31
+ },
32
+ "gpu_nvidia": [
33
+ {
34
+ "name": "NVIDIA A100-SXM4-80GB",
35
+ "memoryTotal": "85899345920",
36
+ "cudaCores": 6912,
37
+ "architecture": "Ampere",
38
+ "uuid": "GPU-0360b5f7-c6ac-dc17-a431-8ebfcce4c66d"
39
+ },
40
+ {
41
+ "name": "NVIDIA A100-SXM4-80GB",
42
+ "memoryTotal": "85899345920",
43
+ "cudaCores": 6912,
44
+ "architecture": "Ampere",
45
+ "uuid": "GPU-56e8ab97-6913-5234-1a79-ff5ea4dfa2db"
46
+ },
47
+ {
48
+ "name": "NVIDIA A100-SXM4-80GB",
49
+ "memoryTotal": "85899345920",
50
+ "cudaCores": 6912,
51
+ "architecture": "Ampere",
52
+ "uuid": "GPU-1dca27df-e145-5a54-2a2f-f5b58257f3ee"
53
+ },
54
+ {
55
+ "name": "NVIDIA A100-SXM4-80GB",
56
+ "memoryTotal": "85899345920",
57
+ "cudaCores": 6912,
58
+ "architecture": "Ampere",
59
+ "uuid": "GPU-92c24b01-1351-1c21-e267-73234c73be2f"
60
+ },
61
+ {
62
+ "name": "NVIDIA A100-SXM4-80GB",
63
+ "memoryTotal": "85899345920",
64
+ "cudaCores": 6912,
65
+ "architecture": "Ampere",
66
+ "uuid": "GPU-2af0eb8b-44c0-74eb-b3d3-d5107ccb6aa1"
67
+ },
68
+ {
69
+ "name": "NVIDIA A100-SXM4-80GB",
70
+ "memoryTotal": "85899345920",
71
+ "cudaCores": 6912,
72
+ "architecture": "Ampere",
73
+ "uuid": "GPU-89cf5df2-4951-2e7b-3ee7-3d0fdafc5d07"
74
+ },
75
+ {
76
+ "name": "NVIDIA A100-SXM4-80GB",
77
+ "memoryTotal": "85899345920",
78
+ "cudaCores": 6912,
79
+ "architecture": "Ampere",
80
+ "uuid": "GPU-f20f8083-ef3d-ef3f-7c2f-3244a690c7b6"
81
+ },
82
+ {
83
+ "name": "NVIDIA A100-SXM4-80GB",
84
+ "memoryTotal": "85899345920",
85
+ "cudaCores": 6912,
86
+ "architecture": "Ampere",
87
+ "uuid": "GPU-ab6cfaee-4d49-f6ee-3218-4f47bfeed414"
88
+ }
89
+ ],
90
+ "cudaVersion": "12.2",
91
+ "slurm": {
92
+ "cluster_name": "cluster_sproject3",
93
+ "conf": "/etc/slurm/slurm.conf",
94
+ "cpus_on_node": "8",
95
+ "cpus_per_task": "8",
96
+ "distribution": "cyclic",
97
+ "gtids": "0",
98
+ "job_account": "research",
99
+ "job_cpus_per_node": "8",
100
+ "job_cpus_per_node_pack_group_0": "8",
101
+ "job_gid": "200000139",
102
+ "job_gpus": "0,1,2,3",
103
+ "job_id": "7466527",
104
+ "job_name": "vlm_ft",
105
+ "job_nodelist": "SH-IDC1-10-140-37-90",
106
+ "job_num_nodes": "1",
107
+ "job_partition": "interntmp",
108
+ "job_qos": "normal",
109
+ "job_uid": "200000139",
110
+ "job_user": "wangmaonan",
111
+ "jobid": "7466527",
112
+ "launch_node_ipaddr": "10.140.37.90",
113
+ "localid": "0",
114
+ "mem_per_node": "49152",
115
+ "nnodes": "1",
116
+ "node_aliases": "(null)",
117
+ "nodeid": "0",
118
+ "nodelist": "SH-IDC1-10-140-37-90",
119
+ "nprocs": "1",
120
+ "ntasks": "1",
121
+ "ntasks_per_node": "1",
122
+ "prio_process": "0",
123
+ "procid": "0",
124
+ "srun_comm_host": "10.140.37.90",
125
+ "srun_comm_port": "43483",
126
+ "step_gpus": "0,1,2,3",
127
+ "step_id": "0",
128
+ "step_launcher_port": "43483",
129
+ "step_nodelist": "SH-IDC1-10-140-37-90",
130
+ "step_num_nodes": "1",
131
+ "step_num_tasks": "1",
132
+ "step_tasks_per_node": "1",
133
+ "stepid": "0",
134
+ "submit_dir": "/mnt/petrelfs/wangmaonan/yuxin/CL_CoTNav",
135
+ "submit_host": "SH-IDC1-10-140-37-90",
136
+ "task_pid": "120843",
137
+ "tasks_per_node": "1",
138
+ "topology_addr": "SH-IDC1-10-140-37-90",
139
+ "topology_addr_pattern": "node",
140
+ "umask": "0002",
141
+ "working_cluster": "cluster_sproject3:SH-IDC1-10-140-37-161:6817:9216:109"
142
+ },
143
+ "writerId": "hpt2lvw45stdjl0sezw62w5lspi4bemx"
144
+ }
wandb/run-20260225_111518-pg2w7c3p/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/train_runtime":117088.0105,"train/loss":0.1976,"train/global_step":9300,"_runtime":117087,"_timestamp":1.7721064053506212e+09,"train/learning_rate":0,"train/epoch":1,"train/train_steps_per_second":0.079,"train/total_flos":8.206174795797096e+22,"_wandb":{"runtime":117087},"train/train_loss":0.3191605252492171,"_step":9300,"train/train_samples_per_second":0.318}
wandb/run-20260225_111518-pg2w7c3p/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-02-25T11:15:18.181861838+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpx01znwkb/port-121696.txt","pid":121696,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2026-02-25T11:15:18.184202356+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":121696}
3
+ {"time":"2026-02-25T11:15:18.184392132+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-121696-125729-3028696784/socket","Net":"unix"}}
4
+ {"time":"2026-02-25T11:15:18.255635297+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2026-02-25T11:15:18.265170365+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"pg2w7c3p","id":"1(@)"}
6
+ {"time":"2026-02-25T11:15:23.029413819+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"pg2w7c3p","id":"1(@)"}
7
+ {"time":"2026-02-26T19:46:56.115178582+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2026-02-26T19:46:56.115812331+08:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
9
+ {"time":"2026-02-26T19:46:56.116231092+08:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
10
+ {"time":"2026-02-26T19:46:56.115821395+08:00","level":"INFO","msg":"server is shutting down"}
11
+ {"time":"2026-02-26T19:46:56.117104362+08:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-121696-125729-3028696784/socket","Net":"unix"}}
12
+ {"time":"2026-02-26T19:47:02.314061774+08:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2026-02-26T19:47:02.314530487+08:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2026-02-26T19:47:02.31490045+08:00","level":"INFO","msg":"server is closed"}
wandb/run-20260225_111518-pg2w7c3p/logs/debug-internal.log ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-02-25T11:15:18.266363836+08:00","level":"INFO","msg":"stream: starting","core version":"0.22.3"}
2
+ {"time":"2026-02-25T11:15:23.025454392+08:00","level":"INFO","msg":"stream: created new stream","id":"pg2w7c3p"}
3
+ {"time":"2026-02-25T11:15:23.025989355+08:00","level":"INFO","msg":"handler: started","stream_id":"pg2w7c3p"}
4
+ {"time":"2026-02-25T11:15:23.029061332+08:00","level":"INFO","msg":"stream: started","id":"pg2w7c3p"}
5
+ {"time":"2026-02-25T11:15:23.029075662+08:00","level":"INFO","msg":"sender: started","stream_id":"pg2w7c3p"}
6
+ {"time":"2026-02-25T11:15:23.02907461+08:00","level":"INFO","msg":"writer: started","stream_id":"pg2w7c3p"}
7
+ {"time":"2026-02-25T15:48:54.519398391+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/tsaisplus-nanyang-technological-university-singapore/prompt_revision/pg2w7c3p/file_stream\": EOF"}
8
+ {"time":"2026-02-25T21:07:59.713263866+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": unexpected EOF"}
9
+ {"time":"2026-02-26T01:46:00.156791646+08:00","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":64931}
10
+ {"time":"2026-02-26T01:46:01.26570749+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
11
+ {"time":"2026-02-26T01:46:04.155088853+08:00","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":13}
12
+ {"time":"2026-02-26T02:27:44.815648016+08:00","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":68065}
13
+ {"time":"2026-02-26T02:27:46.348757133+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
14
+ {"time":"2026-02-26T02:27:49.049539083+08:00","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":13}
15
+ {"time":"2026-02-26T03:22:31.469120504+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
16
+ {"time":"2026-02-26T03:22:33.406774848+08:00","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":72171}
17
+ {"time":"2026-02-26T03:22:34.088783654+08:00","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":13}
18
+ {"time":"2026-02-26T08:26:46.733939909+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
19
+ {"time":"2026-02-26T08:26:53.80398635+08:00","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":95045}
20
+ {"time":"2026-02-26T08:26:57.006489641+08:00","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":13}
21
+ {"time":"2026-02-26T09:11:42.493210656+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/tsaisplus-nanyang-technological-university-singapore/prompt_revision/pg2w7c3p/file_stream\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)"}
22
+ {"time":"2026-02-26T12:21:30.540150918+08:00","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":112659}
23
+ {"time":"2026-02-26T12:21:31.920394554+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
24
+ {"time":"2026-02-26T12:21:34.277988733+08:00","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":13}
25
+ {"time":"2026-02-26T19:46:56.115836343+08:00","level":"INFO","msg":"stream: closing","id":"pg2w7c3p"}
26
+ {"time":"2026-02-26T19:47:01.871500678+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
27
+ {"time":"2026-02-26T19:47:02.310370252+08:00","level":"INFO","msg":"handler: closed","stream_id":"pg2w7c3p"}
28
+ {"time":"2026-02-26T19:47:02.312032251+08:00","level":"INFO","msg":"sender: closed","stream_id":"pg2w7c3p"}
29
+ {"time":"2026-02-26T19:47:02.312485027+08:00","level":"INFO","msg":"stream: closed","id":"pg2w7c3p"}