Sigma
File size: 27,293 Bytes
(sigma_vla) root@C.28229820:/workspace$ python /workspace/train_sigma_telepathy_vla_lora.py \
  --base_model_id "lerobot/pi05_base" \
  --data_dir "/workspace/storage/sigma_pickplace" \
  --output_dir "/workspace/storage/sigma_lora_out" \
  --torch_dtype bf16 \
  --load_in_4bit \
  --batch_size 2 \
  --grad_accum 8 \
  --epochs 3 \
  --hard_mining_ratio 0.3 \
  --hard_mining_lambda 1.0
[WARN] Base model lerobot/pi05_base is not a standard Transformers CausalLM. Telepathy training will run without LoRA on the base model. Error=ValueError('Unrecognized model in lerobot/pi05_base. Should have a `model_type` key in its config.json, or contain one of the following strings in its name: albert, align, altclip, audio-spectrogram-transformer, autoformer, bark, bart, beit, bert, bert-generation, big_bird, bigbird_pegasus, biogpt, bit, blenderbot, blenderbot-small, blip, blip-2, bloom, bridgetower, bros, camembert, canine, chameleon, chinese_clip, chinese_clip_vision_model, clap, clip, clip_vision_model, clipseg, clvp, code_llama, codegen, cohere, conditional_detr, convbert, convnext, convnextv2, cpmant, ctrl, cvt, data2vec-audio, data2vec-text, data2vec-vision, dbrx, deberta, deberta-v2, decision_transformer, deformable_detr, deit, depth_anything, deta, detr, dinat, dinov2, distilbert, donut-swin, dpr, dpt, efficientformer, efficientnet, electra, encodec, encoder-decoder, ernie, ernie_m, esm, falcon, fastspeech2_conformer, flaubert, flava, fnet, focalnet, fsmt, funnel, fuyu, gemma, gemma2, git, glpn, gpt-sw3, gpt2, gpt_bigcode, gpt_neo, gpt_neox, gpt_neox_japanese, gptj, gptsan-japanese, graphormer, grounding-dino, groupvit, hiera, hubert, ibert, idefics, idefics2, imagegpt, informer, instructblip, instructblipvideo, jamba, jetmoe, jukebox, kosmos-2, layoutlm, layoutlmv2, layoutlmv3, led, levit, lilt, llama, llava, llava-next-video, llava_next, longformer, longt5, luke, lxmert, m2m_100, mamba, mamba2, marian, markuplm, mask2former, maskformer, maskformer-swin, mbart, mctct, mega, megatron-bert, mgp-str, mistral, mixtral, mobilebert, mobilenet_v1, mobilenet_v2, mobilevit, mobilevitv2, mpnet, mpt, mra, mt5, musicgen, musicgen_melody, mvp, nat, nemotron, nezha, nllb-moe, nougat, nystromformer, olmo, oneformer, open-llama, openai-gpt, opt, owlv2, owlvit, paligemma, patchtsmixer, patchtst, pegasus, pegasus_x, perceiver, persimmon, phi, phi3, pix2struct, plbart, poolformer, pop2piano, prophetnet, pvt, pvt_v2, qdqbert, qwen2, qwen2_moe, rag, realm, recurrent_gemma, reformer, regnet, rembert, resnet, retribert, roberta, roberta-prelayernorm, roc_bert, roformer, rt_detr, rt_detr_resnet, rwkv, sam, seamless_m4t, seamless_m4t_v2, segformer, seggpt, sew, sew-d, siglip, siglip_vision_model, speech-encoder-decoder, speech_to_text, speech_to_text_2, speecht5, splinter, squeezebert, stablelm, starcoder2, superpoint, swiftformer, swin, swin2sr, swinv2, switch_transformers, t5, table-transformer, tapas, time_series_transformer, timesformer, timm_backbone, trajectory_transformer, transfo-xl, trocr, tvlt, tvp, udop, umt5, unispeech, unispeech-sat, univnet, upernet, van, video_llava, videomae, vilt, vipllava, vision-encoder-decoder, vision-text-dual-encoder, visual_bert, vit, vit_hybrid, vit_mae, vit_msn, vitdet, vitmatte, vits, vivit, wav2vec2, wav2vec2-bert, wav2vec2-conformer, wavlm, whisper, xclip, xglm, xlm, xlm-prophetnet, xlm-roberta, xlm-roberta-xl, xlnet, xmod, yolos, yoso, zoedepth')
/venv/sigma_vla/lib/python3.10/site-packages/torch/nn/modules/transformer.py:382: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True
  warnings.warn(
storage/sigma_pickplace/shard_00000.pt: 100%|█████████████████████████████████████████████████████████████████████████████████████| 15.1G/15.1G [00:09<00:00, 1.62GB/s]

storage/sigma_pickplace/shard_00002.pt: 100%|█████████████████████████████████████████████████████████████████████████████████████| 12.4G/12.4G [00:10<00:00, 1.18GB/s]

storage/sigma_pickplace/shard_00001.pt: 100%|█████████████████████████████████████████████████████████████████████████████████████| 15.1G/15.1G [00:11<00:00, 1.29GB/s]

Fetching 3 files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:12<00:00,  4.02s/it]

[INFO] Loaded HF shards from Veltraxor/Sigma/storage/sigma_pickplace████████████████████████████████████████████████████████████  | 14.8G/15.1G [00:11<00:00, 3.67GB/s]

epoch=0 step=0 gstep=0 loss=1086.9083 L_act=1086.8982 L_sem=0.0693 L_int=0.0323 L_tau=0.1718 L_act_hard=618.7102 w_sem=0.100 w_int=0.100 w_tau=0.000 hard_ratio=0.30 tau_rms=0.048737

epoch=0 step=10 gstep=10 loss=1914.0575 L_act=1914.0359 L_sem=0.0693 L_int=0.0346 L_tau=0.1711 L_act_hard=1141.1694 w_sem=0.210 w_int=0.185 w_tau=0.004 hard_ratio=0.30 tau_rms=0.048624

epoch=0 step=20 gstep=20 loss=1308.3302 L_act=1308.3080 L_sem=0.0693 L_int=-0.0042 L_tau=0.1691 L_act_hard=716.4774 w_sem=0.320 w_int=0.271 w_tau=0.007 hard_ratio=0.30 tau_rms=0.043972

epoch=0 step=30 gstep=30 loss=1033.4258 L_act=1033.4686 L_sem=0.0693 L_int=-0.2089 L_tau=0.1557 L_act_hard=573.1190 w_sem=0.429 w_int=0.356 w_tau=0.011 hard_ratio=0.30 tau_rms=0.054172

epoch=0 step=40 gstep=40 loss=1838.2953 L_act=1838.5004 L_sem=0.0734 L_int=-0.5576 L_tau=0.1124 L_act_hard=1100.0006 w_sem=0.539 w_int=0.441 w_tau=0.015 hard_ratio=0.30 tau_rms=0.145681

epoch=0 step=50 gstep=50 loss=1103.2909 L_act=1103.5840 L_sem=0.0693 L_int=-0.6446 L_tau=0.0896 L_act_hard=580.7323 w_sem=0.649 w_int=0.527 w_tau=0.018 hard_ratio=0.30 tau_rms=0.241012

epoch=0 step=60 gstep=60 loss=1426.9183 L_act=1427.2749 L_sem=0.0693 L_int=-0.6710 L_tau=0.0718 L_act_hard=748.6382 w_sem=0.759 w_int=0.612 w_tau=0.022 hard_ratio=0.30 tau_rms=0.380592

epoch=0 step=70 gstep=70 loss=2036.9558 L_act=2037.3759 L_sem=0.0693 L_int=-0.6906 L_tau=0.0582 L_act_hard=1181.5132 w_sem=0.868 w_int=0.698 w_tau=0.026 hard_ratio=0.30 tau_rms=0.565631

epoch=0 step=80 gstep=80 loss=2080.1997 L_act=2080.6604 L_sem=0.1175 L_int=-0.7369 L_tau=0.0453 L_act_hard=1148.7366 w_sem=0.978 w_int=0.783 w_tau=0.029 hard_ratio=0.30 tau_rms=1.075312

epoch=0 step=90 gstep=90 loss=2319.5828 L_act=2320.0923 L_sem=0.0693 L_int=-0.7254 L_tau=0.0488 L_act_hard=1162.6475 w_sem=1.000 w_int=0.800 w_tau=0.030 hard_ratio=0.30 tau_rms=1.434653

epoch=0 step=100 gstep=100 loss=1975.6973 L_act=1976.1826 L_sem=0.0693 L_int=-0.6956 L_tau=0.0607 L_act_hard=1133.2614 w_sem=1.000 w_int=0.800 w_tau=0.030 hard_ratio=0.30 tau_rms=1.898025

epoch=0 step=110 gstep=110 loss=1112.9913 L_act=1113.4468 L_sem=0.0693 L_int=-0.6592 L_tau=0.0848 L_act_hard=585.0326 w_sem=1.000 w_int=0.800 w_tau=0.030 hard_ratio=0.30 tau_rms=2.502877

epoch=0 step=120 gstep=120 loss=1058.9313 L_act=1058.8882 L_sem=0.5253 L_int=-0.6104 L_tau=0.2024 L_act_hard=535.2509 w_sem=1.000 w_int=0.800 w_tau=0.030 hard_ratio=0.30 tau_rms=4.292017

epoch=0 step=130 gstep=130 loss=1669.4806 L_act=1669.8701 L_sem=0.0693 L_int=-0.5863 L_tau=0.3365 L_act_hard=835.4255 w_sem=1.000 w_int=0.800 w_tau=0.030 hard_ratio=0.30 tau_rms=5.652924

Saved sigma Telepathy heads and (if available) LoRA adapter.



(sigma_vla) root@C.28229820:/workspace$ cd /workspace



python eval_sigma_vla_rollout.py \

  --base_model_id "lerobot/pi05_base" \

  --tokenizer_id "google/paligemma-3b-pt-224" \

  --artifacts_repo_id "Veltraxor/Sigma" \

  --output_dir "/workspace/storage/sigma_eval_out_telepathy_full" \

  --batch_size 4 \

  --num_workers 2 \

  --dtype bf16

/venv/sigma_vla/lib/python3.10/site-packages/huggingface_hub/file_download.py:982: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`.

For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.

  warnings.warn(

meta.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 505/505 [00:00<00:00, 3.39MB/s]

train_args.json: 1.15kB [00:00, 3.67MB/s]                                                                                                    | 0.00/505 [00:00<?, ?B/s]

storage/sigma_lora_out/sigma_telepathy_h(…): 100%|███████████████████████████████████████████████████████████████████████████████████| 589M/589M [00:02<00:00, 207MB/s]

storage/sigma_pickplace/shard_00000.pt: 100%|█████████████████████████████████████████████████████████████████████████████████████| 15.1G/15.1G [00:09<00:00, 1.63GB/s]

storage/sigma_pickplace/shard_00002.pt: 100%|█████████████████████████████████████████████████████████████████████████████████████| 12.4G/12.4G [00:11<00:00, 1.08GB/s]

storage/sigma_pickplace/shard_00001.pt: 100%|█████████████████████████████████████████████████████████████████████████████████████| 15.1G/15.1G [00:12<00:00, 1.20GB/s]

Fetching 6 files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:12<00:00,  2.16s/it]

[INFO] Using cached shard_dir: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace███████████████████████████████▎| 15.0G/15.1G [00:12<00:00, 1.76GB/s]

[INFO] Using cached telepathy_heads_path: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt

/venv/sigma_vla/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.

  warnings.warn(

WARNING:bitsandbytes.cextension:Could not find the bitsandbytes CUDA binary at PosixPath('/venv/sigma_vla/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda126.so')

WARNING:bitsandbytes.cextension:The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.

[policies_init] WARNING: optional groot deps missing: Failed to import diffusers.models.modeling_utils because of the following error (look up to see its traceback):

No module named 'triton.ops'

The PI05 model is a direct port of the OpenPI implementation. 

This implementation follows the original OpenPI structure for compatibility. 

Original implementation: https://github.com/Physical-Intelligence/openpi

config.json: 1.90kB [00:00, 8.07MB/s]

WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'.

WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'.

/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/paligemma/configuration_paligemma.py:137: FutureWarning: The `vocab_size` attribute is deprecated and will be removed in v4.44, Please use `text_config.vocab_size` instead.

  warnings.warn(

WARNING:root:[patch_pi05] Could not run transformers version guard (An incorrect transformer version is used, please create an issue on https://github.com/huggingface/lerobot/issues). Continuing without strict transformers check. cannot import name 'check' from 'transformers.models.siglip' (/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/siglip/__init__.py)

Loading model from: lerobot/pi05_base

model.safetensors: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 14.5G/14.5G [00:07<00:00, 1.85GB/s]

✓ Loaded state dict from model.safetensors

WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias

WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight

Remapped: action_in_proj.bias -> model.action_in_proj.bias

Remapped: action_in_proj.weight -> model.action_in_proj.weight

Remapped: action_out_proj.bias -> model.action_out_proj.bias

Remapped: action_out_proj.weight -> model.action_out_proj.weight

Remapped: paligemma_with_expert.gemma_expert.lm_head.weight -> model.paligemma_with_expert.gemma_expert.lm_head.weight

Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias

Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight

Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight

Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight

Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight

Remapped 812 state dict keys

[patch_pi05] Could not tie embed_tokens to lm_head: 'PaliGemmaForConditionalGeneration' object has no attribute 'model'

Warning: Could not remap state dict keys: 'PaliGemmaForConditionalGeneration' object has no attribute 'model'

tokenizer_config.json: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 40.0k/40.0k [00:00<00:00, 127MB/s]

tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.26M/4.26M [00:00<00:00, 11.6MB/s]

tokenizer.json: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17.5M/17.5M [00:00<00:00, 43.6MB/s]

added_tokens.json: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24.0/24.0 [00:00<00:00, 157kB/s]

special_tokens_map.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 607/607 [00:00<00:00, 1.75MB/s]

/venv/sigma_vla/lib/python3.10/site-packages/torch/nn/modules/transformer.py:382: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True

  warnings.warn(

[CHECK-A] disable_telepathy=False

[CHECK-A] telepathy_heads_path=/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt size=561.95MB

[CHECK-A] heads_tensors=325 mean=0.002335 std=0.106945 rms=0.106970

[CHECK-A] heads fully matched (no missing/unexpected).

[INFO] Found 3 shard files. Example: ['/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00000.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00001.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00002.pt']

[CHECK-B] telepathy_effect_mean_abs_diff(action_vector)=1.849815

batch=0 mse_vec=61.8348 mse_chk=292.1768 mse_trj=251.0094 tau_l2=51.5933 sem_align=0.1307

batch=20 mse_vec=113.4773 mse_chk=182.1005 mse_trj=159.5740 tau_l2=51.5991 sem_align=0.1307

batch=40 mse_vec=49.3401 mse_chk=236.0208 mse_trj=211.5081 tau_l2=51.5975 sem_align=0.1307

batch=60 mse_vec=50.5029 mse_chk=214.0788 mse_trj=187.4922 tau_l2=51.5993 sem_align=0.1307

batch=80 mse_vec=108.2934 mse_chk=168.4180 mse_trj=150.3440 tau_l2=51.5997 sem_align=0.1307

batch=100 mse_vec=45.8751 mse_chk=208.8927 mse_trj=188.5907 tau_l2=51.5998 sem_align=0.1307

batch=120 mse_vec=71.4663 mse_chk=299.9240 mse_trj=250.6835 tau_l2=51.5942 sem_align=0.1307

batch=140 mse_vec=149.4102 mse_chk=246.7899 mse_trj=207.3502 tau_l2=51.5906 sem_align=0.1306

batch=160 mse_vec=69.1126 mse_chk=293.3152 mse_trj=253.5936 tau_l2=51.5934 sem_align=0.1307

batch=180 mse_vec=33.8928 mse_chk=163.4598 mse_trj=149.5725 tau_l2=51.6026 sem_align=0.1307

[DONE] Saved report: {'num_samples': 723, 'num_batches': 181, 'avg_mse_vector': 79.03209517673893, 'avg_mse_chunk': 203.0505365192561, 'avg_mse_traj': 174.71073359283952, 'avg_tau_l2': 51.59980976252266, 'avg_semantic_text_alignment': 0.13066553295646582, 'hard_thresholds': {'vec': 0.1, 'chk': 0.2, 'trj': 0.2}, 'avg_hard_mse_vector': 79.09452783946014, 'avg_hard_mse_chunk': 203.10529347442161, 'avg_hard_mse_traj': 174.7455033424979, 'hard_sample_fraction': 1.0, 'total_hard_samples': 723}



(sigma_vla) root@C.28229820:/workspace$ cd /workspace



python eval_sigma_vla_rollout.py \

  --base_model_id "lerobot/pi05_base" \

  --tokenizer_id "google/paligemma-3b-pt-224" \

  --artifacts_repo_id "Veltraxor/Sigma" \

  --output_dir "/workspace/storage/sigma_eval_out_control_full" \

  --batch_size 4 \

  --num_workers 2 \

  --dtype bf16 \

  --disable_telepathy

/venv/sigma_vla/lib/python3.10/site-packages/huggingface_hub/file_download.py:982: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`.

For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.

  warnings.warn(

Fetching 6 files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 5260.41it/s]

[INFO] Using cached shard_dir: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace

[INFO] Using cached telepathy_heads_path: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt

/venv/sigma_vla/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.

  warnings.warn(

WARNING:bitsandbytes.cextension:Could not find the bitsandbytes CUDA binary at PosixPath('/venv/sigma_vla/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda126.so')

WARNING:bitsandbytes.cextension:The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.

[policies_init] WARNING: optional groot deps missing: Failed to import diffusers.models.modeling_utils because of the following error (look up to see its traceback):

No module named 'triton.ops'

The PI05 model is a direct port of the OpenPI implementation. 

This implementation follows the original OpenPI structure for compatibility. 

Original implementation: https://github.com/Physical-Intelligence/openpi

WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'.

WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'.

/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/paligemma/configuration_paligemma.py:137: FutureWarning: The `vocab_size` attribute is deprecated and will be removed in v4.44, Please use `text_config.vocab_size` instead.

  warnings.warn(

WARNING:root:[patch_pi05] Could not run transformers version guard (An incorrect transformer version is used, please create an issue on https://github.com/huggingface/lerobot/issues). Continuing without strict transformers check. cannot import name 'check' from 'transformers.models.siglip' (/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/siglip/__init__.py)

Loading model from: lerobot/pi05_base

✓ Loaded state dict from model.safetensors

WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias

WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight

Remapped: action_in_proj.bias -> model.action_in_proj.bias

Remapped: action_in_proj.weight -> model.action_in_proj.weight

Remapped: action_out_proj.bias -> model.action_out_proj.bias

Remapped: action_out_proj.weight -> model.action_out_proj.weight

Remapped: paligemma_with_expert.gemma_expert.lm_head.weight -> model.paligemma_with_expert.gemma_expert.lm_head.weight

Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias

Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight

Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight

Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight

Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight

Remapped 812 state dict keys

[patch_pi05] Could not tie embed_tokens to lm_head: 'PaliGemmaForConditionalGeneration' object has no attribute 'model'

Warning: Could not remap state dict keys: 'PaliGemmaForConditionalGeneration' object has no attribute 'model'

/venv/sigma_vla/lib/python3.10/site-packages/torch/nn/modules/transformer.py:382: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True

  warnings.warn(

[CHECK-A] disable_telepathy=True

[CHECK-A] telepathy_heads_path=/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt size=561.95MB

[CHECK-A] heads_tensors=325 mean=0.002335 std=0.106945 rms=0.106970

[CHECK-A] heads fully matched (no missing/unexpected).

[INFO] Found 3 shard files. Example: ['/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00000.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00001.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00002.pt']

[CHECK-B] telepathy_effect_mean_abs_diff(action_vector)=0.000000

batch=0 mse_vec=120.3824 mse_chk=329.0229 mse_trj=273.8570 tau_l2=51.5933 sem_align=0.1307

batch=20 mse_vec=118.0599 mse_chk=199.8560 mse_trj=170.4612 tau_l2=51.5991 sem_align=0.1307

batch=40 mse_vec=104.9314 mse_chk=267.9070 mse_trj=232.3701 tau_l2=51.5975 sem_align=0.1307

batch=60 mse_vec=88.2938 mse_chk=240.8827 mse_trj=204.7720 tau_l2=51.5993 sem_align=0.1307

batch=80 mse_vec=111.9469 mse_chk=184.6580 mse_trj=160.4071 tau_l2=51.5997 sem_align=0.1307

batch=100 mse_vec=92.7777 mse_chk=237.7548 mse_trj=207.4138 tau_l2=51.5998 sem_align=0.1307

batch=120 mse_vec=124.7804 mse_chk=337.7808 mse_trj=274.7607 tau_l2=51.5942 sem_align=0.1307

batch=140 mse_vec=166.1155 mse_chk=275.2057 mse_trj=227.3145 tau_l2=51.5906 sem_align=0.1306

batch=160 mse_vec=130.3184 mse_chk=330.5708 mse_trj=276.9996 tau_l2=51.5934 sem_align=0.1307

batch=180 mse_vec=64.9382 mse_chk=188.2768 mse_trj=165.8391 tau_l2=51.6026 sem_align=0.1307

[DONE] Saved report: {'num_samples': 723, 'num_batches': 181, 'avg_mse_vector': 98.83019052157745, 'avg_mse_chunk': 228.9720206076269, 'avg_mse_traj': 191.032333921991, 'avg_tau_l2': 51.59980976252266, 'avg_semantic_text_alignment': 0.13066553295646582, 'hard_thresholds': {'vec': 0.1, 'chk': 0.2, 'trj': 0.2}, 'avg_hard_mse_vector': 98.87706757647038, 'avg_hard_mse_chunk': 229.02830963345815, 'avg_hard_mse_traj': 191.0671798051805, 'hard_sample_fraction': 1.0, 'total_hard_samples': 723}