File size: 27,293 Bytes
6140022 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
(sigma_vla) root@C.28229820:/workspace$ python /workspace/train_sigma_telepathy_vla_lora.py \
--base_model_id "lerobot/pi05_base" \
--data_dir "/workspace/storage/sigma_pickplace" \
--output_dir "/workspace/storage/sigma_lora_out" \
--torch_dtype bf16 \
--load_in_4bit \
--batch_size 2 \
--grad_accum 8 \
--epochs 3 \
--hard_mining_ratio 0.3 \
--hard_mining_lambda 1.0
[WARN] Base model lerobot/pi05_base is not a standard Transformers CausalLM. Telepathy training will run without LoRA on the base model. Error=ValueError('Unrecognized model in lerobot/pi05_base. Should have a `model_type` key in its config.json, or contain one of the following strings in its name: albert, align, altclip, audio-spectrogram-transformer, autoformer, bark, bart, beit, bert, bert-generation, big_bird, bigbird_pegasus, biogpt, bit, blenderbot, blenderbot-small, blip, blip-2, bloom, bridgetower, bros, camembert, canine, chameleon, chinese_clip, chinese_clip_vision_model, clap, clip, clip_vision_model, clipseg, clvp, code_llama, codegen, cohere, conditional_detr, convbert, convnext, convnextv2, cpmant, ctrl, cvt, data2vec-audio, data2vec-text, data2vec-vision, dbrx, deberta, deberta-v2, decision_transformer, deformable_detr, deit, depth_anything, deta, detr, dinat, dinov2, distilbert, donut-swin, dpr, dpt, efficientformer, efficientnet, electra, encodec, encoder-decoder, ernie, ernie_m, esm, falcon, fastspeech2_conformer, flaubert, flava, fnet, focalnet, fsmt, funnel, fuyu, gemma, gemma2, git, glpn, gpt-sw3, gpt2, gpt_bigcode, gpt_neo, gpt_neox, gpt_neox_japanese, gptj, gptsan-japanese, graphormer, grounding-dino, groupvit, hiera, hubert, ibert, idefics, idefics2, imagegpt, informer, instructblip, instructblipvideo, jamba, jetmoe, jukebox, kosmos-2, layoutlm, layoutlmv2, layoutlmv3, led, levit, lilt, llama, llava, llava-next-video, llava_next, longformer, longt5, luke, lxmert, m2m_100, mamba, mamba2, marian, markuplm, mask2former, maskformer, maskformer-swin, mbart, mctct, mega, megatron-bert, mgp-str, mistral, mixtral, mobilebert, mobilenet_v1, mobilenet_v2, mobilevit, mobilevitv2, mpnet, mpt, mra, mt5, musicgen, musicgen_melody, mvp, nat, nemotron, nezha, nllb-moe, nougat, nystromformer, olmo, oneformer, open-llama, openai-gpt, opt, owlv2, owlvit, paligemma, patchtsmixer, patchtst, pegasus, pegasus_x, perceiver, persimmon, phi, phi3, pix2struct, plbart, poolformer, pop2piano, prophetnet, pvt, pvt_v2, qdqbert, qwen2, qwen2_moe, rag, realm, recurrent_gemma, reformer, regnet, rembert, resnet, retribert, roberta, roberta-prelayernorm, roc_bert, roformer, rt_detr, rt_detr_resnet, rwkv, sam, seamless_m4t, seamless_m4t_v2, segformer, seggpt, sew, sew-d, siglip, siglip_vision_model, speech-encoder-decoder, speech_to_text, speech_to_text_2, speecht5, splinter, squeezebert, stablelm, starcoder2, superpoint, swiftformer, swin, swin2sr, swinv2, switch_transformers, t5, table-transformer, tapas, time_series_transformer, timesformer, timm_backbone, trajectory_transformer, transfo-xl, trocr, tvlt, tvp, udop, umt5, unispeech, unispeech-sat, univnet, upernet, van, video_llava, videomae, vilt, vipllava, vision-encoder-decoder, vision-text-dual-encoder, visual_bert, vit, vit_hybrid, vit_mae, vit_msn, vitdet, vitmatte, vits, vivit, wav2vec2, wav2vec2-bert, wav2vec2-conformer, wavlm, whisper, xclip, xglm, xlm, xlm-prophetnet, xlm-roberta, xlm-roberta-xl, xlnet, xmod, yolos, yoso, zoedepth')
/venv/sigma_vla/lib/python3.10/site-packages/torch/nn/modules/transformer.py:382: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True
warnings.warn(
storage/sigma_pickplace/shard_00000.pt: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 15.1G/15.1G [00:09<00:00, 1.62GB/s]
storage/sigma_pickplace/shard_00002.pt: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 12.4G/12.4G [00:10<00:00, 1.18GB/s]
storage/sigma_pickplace/shard_00001.pt: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 15.1G/15.1G [00:11<00:00, 1.29GB/s]
Fetching 3 files: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 3/3 [00:12<00:00, 4.02s/it]
[INFO] Loaded HF shards from Veltraxor/Sigma/storage/sigma_pickplaceββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 14.8G/15.1G [00:11<00:00, 3.67GB/s]
epoch=0 step=0 gstep=0 loss=1086.9083 L_act=1086.8982 L_sem=0.0693 L_int=0.0323 L_tau=0.1718 L_act_hard=618.7102 w_sem=0.100 w_int=0.100 w_tau=0.000 hard_ratio=0.30 tau_rms=0.048737
epoch=0 step=10 gstep=10 loss=1914.0575 L_act=1914.0359 L_sem=0.0693 L_int=0.0346 L_tau=0.1711 L_act_hard=1141.1694 w_sem=0.210 w_int=0.185 w_tau=0.004 hard_ratio=0.30 tau_rms=0.048624
epoch=0 step=20 gstep=20 loss=1308.3302 L_act=1308.3080 L_sem=0.0693 L_int=-0.0042 L_tau=0.1691 L_act_hard=716.4774 w_sem=0.320 w_int=0.271 w_tau=0.007 hard_ratio=0.30 tau_rms=0.043972
epoch=0 step=30 gstep=30 loss=1033.4258 L_act=1033.4686 L_sem=0.0693 L_int=-0.2089 L_tau=0.1557 L_act_hard=573.1190 w_sem=0.429 w_int=0.356 w_tau=0.011 hard_ratio=0.30 tau_rms=0.054172
epoch=0 step=40 gstep=40 loss=1838.2953 L_act=1838.5004 L_sem=0.0734 L_int=-0.5576 L_tau=0.1124 L_act_hard=1100.0006 w_sem=0.539 w_int=0.441 w_tau=0.015 hard_ratio=0.30 tau_rms=0.145681
epoch=0 step=50 gstep=50 loss=1103.2909 L_act=1103.5840 L_sem=0.0693 L_int=-0.6446 L_tau=0.0896 L_act_hard=580.7323 w_sem=0.649 w_int=0.527 w_tau=0.018 hard_ratio=0.30 tau_rms=0.241012
epoch=0 step=60 gstep=60 loss=1426.9183 L_act=1427.2749 L_sem=0.0693 L_int=-0.6710 L_tau=0.0718 L_act_hard=748.6382 w_sem=0.759 w_int=0.612 w_tau=0.022 hard_ratio=0.30 tau_rms=0.380592
epoch=0 step=70 gstep=70 loss=2036.9558 L_act=2037.3759 L_sem=0.0693 L_int=-0.6906 L_tau=0.0582 L_act_hard=1181.5132 w_sem=0.868 w_int=0.698 w_tau=0.026 hard_ratio=0.30 tau_rms=0.565631
epoch=0 step=80 gstep=80 loss=2080.1997 L_act=2080.6604 L_sem=0.1175 L_int=-0.7369 L_tau=0.0453 L_act_hard=1148.7366 w_sem=0.978 w_int=0.783 w_tau=0.029 hard_ratio=0.30 tau_rms=1.075312
epoch=0 step=90 gstep=90 loss=2319.5828 L_act=2320.0923 L_sem=0.0693 L_int=-0.7254 L_tau=0.0488 L_act_hard=1162.6475 w_sem=1.000 w_int=0.800 w_tau=0.030 hard_ratio=0.30 tau_rms=1.434653
epoch=0 step=100 gstep=100 loss=1975.6973 L_act=1976.1826 L_sem=0.0693 L_int=-0.6956 L_tau=0.0607 L_act_hard=1133.2614 w_sem=1.000 w_int=0.800 w_tau=0.030 hard_ratio=0.30 tau_rms=1.898025
epoch=0 step=110 gstep=110 loss=1112.9913 L_act=1113.4468 L_sem=0.0693 L_int=-0.6592 L_tau=0.0848 L_act_hard=585.0326 w_sem=1.000 w_int=0.800 w_tau=0.030 hard_ratio=0.30 tau_rms=2.502877
epoch=0 step=120 gstep=120 loss=1058.9313 L_act=1058.8882 L_sem=0.5253 L_int=-0.6104 L_tau=0.2024 L_act_hard=535.2509 w_sem=1.000 w_int=0.800 w_tau=0.030 hard_ratio=0.30 tau_rms=4.292017
epoch=0 step=130 gstep=130 loss=1669.4806 L_act=1669.8701 L_sem=0.0693 L_int=-0.5863 L_tau=0.3365 L_act_hard=835.4255 w_sem=1.000 w_int=0.800 w_tau=0.030 hard_ratio=0.30 tau_rms=5.652924
Saved sigma Telepathy heads and (if available) LoRA adapter.
(sigma_vla) root@C.28229820:/workspace$ cd /workspace
python eval_sigma_vla_rollout.py \
--base_model_id "lerobot/pi05_base" \
--tokenizer_id "google/paligemma-3b-pt-224" \
--artifacts_repo_id "Veltraxor/Sigma" \
--output_dir "/workspace/storage/sigma_eval_out_telepathy_full" \
--batch_size 4 \
--num_workers 2 \
--dtype bf16
/venv/sigma_vla/lib/python3.10/site-packages/huggingface_hub/file_download.py:982: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`.
For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.
warnings.warn(
meta.json: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 505/505 [00:00<00:00, 3.39MB/s]
train_args.json: 1.15kB [00:00, 3.67MB/s] | 0.00/505 [00:00<?, ?B/s]
storage/sigma_lora_out/sigma_telepathy_h(β¦): 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 589M/589M [00:02<00:00, 207MB/s]
storage/sigma_pickplace/shard_00000.pt: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 15.1G/15.1G [00:09<00:00, 1.63GB/s]
storage/sigma_pickplace/shard_00002.pt: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 12.4G/12.4G [00:11<00:00, 1.08GB/s]
storage/sigma_pickplace/shard_00001.pt: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 15.1G/15.1G [00:12<00:00, 1.20GB/s]
Fetching 6 files: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 6/6 [00:12<00:00, 2.16s/it]
[INFO] Using cached shard_dir: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplaceββββββββββββββββββββββββββββββββ| 15.0G/15.1G [00:12<00:00, 1.76GB/s]
[INFO] Using cached telepathy_heads_path: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt
/venv/sigma_vla/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
warnings.warn(
WARNING:bitsandbytes.cextension:Could not find the bitsandbytes CUDA binary at PosixPath('/venv/sigma_vla/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda126.so')
WARNING:bitsandbytes.cextension:The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.
[policies_init] WARNING: optional groot deps missing: Failed to import diffusers.models.modeling_utils because of the following error (look up to see its traceback):
No module named 'triton.ops'
The PI05 model is a direct port of the OpenPI implementation.
This implementation follows the original OpenPI structure for compatibility.
Original implementation: https://github.com/Physical-Intelligence/openpi
config.json: 1.90kB [00:00, 8.07MB/s]
WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'.
WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'.
/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/paligemma/configuration_paligemma.py:137: FutureWarning: The `vocab_size` attribute is deprecated and will be removed in v4.44, Please use `text_config.vocab_size` instead.
warnings.warn(
WARNING:root:[patch_pi05] Could not run transformers version guard (An incorrect transformer version is used, please create an issue on https://github.com/huggingface/lerobot/issues). Continuing without strict transformers check. cannot import name 'check' from 'transformers.models.siglip' (/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/siglip/__init__.py)
Loading model from: lerobot/pi05_base
model.safetensors: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 14.5G/14.5G [00:07<00:00, 1.85GB/s]
β Loaded state dict from model.safetensors
WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias
WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight
Remapped: action_in_proj.bias -> model.action_in_proj.bias
Remapped: action_in_proj.weight -> model.action_in_proj.weight
Remapped: action_out_proj.bias -> model.action_out_proj.bias
Remapped: action_out_proj.weight -> model.action_out_proj.weight
Remapped: paligemma_with_expert.gemma_expert.lm_head.weight -> model.paligemma_with_expert.gemma_expert.lm_head.weight
Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias
Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight
Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight
Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight
Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight
Remapped 812 state dict keys
[patch_pi05] Could not tie embed_tokens to lm_head: 'PaliGemmaForConditionalGeneration' object has no attribute 'model'
Warning: Could not remap state dict keys: 'PaliGemmaForConditionalGeneration' object has no attribute 'model'
tokenizer_config.json: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 40.0k/40.0k [00:00<00:00, 127MB/s]
tokenizer.model: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 4.26M/4.26M [00:00<00:00, 11.6MB/s]
tokenizer.json: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 17.5M/17.5M [00:00<00:00, 43.6MB/s]
added_tokens.json: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 24.0/24.0 [00:00<00:00, 157kB/s]
special_tokens_map.json: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 607/607 [00:00<00:00, 1.75MB/s]
/venv/sigma_vla/lib/python3.10/site-packages/torch/nn/modules/transformer.py:382: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True
warnings.warn(
[CHECK-A] disable_telepathy=False
[CHECK-A] telepathy_heads_path=/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt size=561.95MB
[CHECK-A] heads_tensors=325 mean=0.002335 std=0.106945 rms=0.106970
[CHECK-A] heads fully matched (no missing/unexpected).
[INFO] Found 3 shard files. Example: ['/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00000.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00001.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00002.pt']
[CHECK-B] telepathy_effect_mean_abs_diff(action_vector)=1.849815
batch=0 mse_vec=61.8348 mse_chk=292.1768 mse_trj=251.0094 tau_l2=51.5933 sem_align=0.1307
batch=20 mse_vec=113.4773 mse_chk=182.1005 mse_trj=159.5740 tau_l2=51.5991 sem_align=0.1307
batch=40 mse_vec=49.3401 mse_chk=236.0208 mse_trj=211.5081 tau_l2=51.5975 sem_align=0.1307
batch=60 mse_vec=50.5029 mse_chk=214.0788 mse_trj=187.4922 tau_l2=51.5993 sem_align=0.1307
batch=80 mse_vec=108.2934 mse_chk=168.4180 mse_trj=150.3440 tau_l2=51.5997 sem_align=0.1307
batch=100 mse_vec=45.8751 mse_chk=208.8927 mse_trj=188.5907 tau_l2=51.5998 sem_align=0.1307
batch=120 mse_vec=71.4663 mse_chk=299.9240 mse_trj=250.6835 tau_l2=51.5942 sem_align=0.1307
batch=140 mse_vec=149.4102 mse_chk=246.7899 mse_trj=207.3502 tau_l2=51.5906 sem_align=0.1306
batch=160 mse_vec=69.1126 mse_chk=293.3152 mse_trj=253.5936 tau_l2=51.5934 sem_align=0.1307
batch=180 mse_vec=33.8928 mse_chk=163.4598 mse_trj=149.5725 tau_l2=51.6026 sem_align=0.1307
[DONE] Saved report: {'num_samples': 723, 'num_batches': 181, 'avg_mse_vector': 79.03209517673893, 'avg_mse_chunk': 203.0505365192561, 'avg_mse_traj': 174.71073359283952, 'avg_tau_l2': 51.59980976252266, 'avg_semantic_text_alignment': 0.13066553295646582, 'hard_thresholds': {'vec': 0.1, 'chk': 0.2, 'trj': 0.2}, 'avg_hard_mse_vector': 79.09452783946014, 'avg_hard_mse_chunk': 203.10529347442161, 'avg_hard_mse_traj': 174.7455033424979, 'hard_sample_fraction': 1.0, 'total_hard_samples': 723}
(sigma_vla) root@C.28229820:/workspace$ cd /workspace
python eval_sigma_vla_rollout.py \
--base_model_id "lerobot/pi05_base" \
--tokenizer_id "google/paligemma-3b-pt-224" \
--artifacts_repo_id "Veltraxor/Sigma" \
--output_dir "/workspace/storage/sigma_eval_out_control_full" \
--batch_size 4 \
--num_workers 2 \
--dtype bf16 \
--disable_telepathy
/venv/sigma_vla/lib/python3.10/site-packages/huggingface_hub/file_download.py:982: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`.
For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.
warnings.warn(
Fetching 6 files: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 6/6 [00:00<00:00, 5260.41it/s]
[INFO] Using cached shard_dir: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace
[INFO] Using cached telepathy_heads_path: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt
/venv/sigma_vla/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
warnings.warn(
WARNING:bitsandbytes.cextension:Could not find the bitsandbytes CUDA binary at PosixPath('/venv/sigma_vla/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda126.so')
WARNING:bitsandbytes.cextension:The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.
[policies_init] WARNING: optional groot deps missing: Failed to import diffusers.models.modeling_utils because of the following error (look up to see its traceback):
No module named 'triton.ops'
The PI05 model is a direct port of the OpenPI implementation.
This implementation follows the original OpenPI structure for compatibility.
Original implementation: https://github.com/Physical-Intelligence/openpi
WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'.
WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'.
/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/paligemma/configuration_paligemma.py:137: FutureWarning: The `vocab_size` attribute is deprecated and will be removed in v4.44, Please use `text_config.vocab_size` instead.
warnings.warn(
WARNING:root:[patch_pi05] Could not run transformers version guard (An incorrect transformer version is used, please create an issue on https://github.com/huggingface/lerobot/issues). Continuing without strict transformers check. cannot import name 'check' from 'transformers.models.siglip' (/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/siglip/__init__.py)
Loading model from: lerobot/pi05_base
β Loaded state dict from model.safetensors
WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias
WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight
Remapped: action_in_proj.bias -> model.action_in_proj.bias
Remapped: action_in_proj.weight -> model.action_in_proj.weight
Remapped: action_out_proj.bias -> model.action_out_proj.bias
Remapped: action_out_proj.weight -> model.action_out_proj.weight
Remapped: paligemma_with_expert.gemma_expert.lm_head.weight -> model.paligemma_with_expert.gemma_expert.lm_head.weight
Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias
Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight
Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight
Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight
Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight
Remapped 812 state dict keys
[patch_pi05] Could not tie embed_tokens to lm_head: 'PaliGemmaForConditionalGeneration' object has no attribute 'model'
Warning: Could not remap state dict keys: 'PaliGemmaForConditionalGeneration' object has no attribute 'model'
/venv/sigma_vla/lib/python3.10/site-packages/torch/nn/modules/transformer.py:382: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True
warnings.warn(
[CHECK-A] disable_telepathy=True
[CHECK-A] telepathy_heads_path=/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt size=561.95MB
[CHECK-A] heads_tensors=325 mean=0.002335 std=0.106945 rms=0.106970
[CHECK-A] heads fully matched (no missing/unexpected).
[INFO] Found 3 shard files. Example: ['/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00000.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00001.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00002.pt']
[CHECK-B] telepathy_effect_mean_abs_diff(action_vector)=0.000000
batch=0 mse_vec=120.3824 mse_chk=329.0229 mse_trj=273.8570 tau_l2=51.5933 sem_align=0.1307
batch=20 mse_vec=118.0599 mse_chk=199.8560 mse_trj=170.4612 tau_l2=51.5991 sem_align=0.1307
batch=40 mse_vec=104.9314 mse_chk=267.9070 mse_trj=232.3701 tau_l2=51.5975 sem_align=0.1307
batch=60 mse_vec=88.2938 mse_chk=240.8827 mse_trj=204.7720 tau_l2=51.5993 sem_align=0.1307
batch=80 mse_vec=111.9469 mse_chk=184.6580 mse_trj=160.4071 tau_l2=51.5997 sem_align=0.1307
batch=100 mse_vec=92.7777 mse_chk=237.7548 mse_trj=207.4138 tau_l2=51.5998 sem_align=0.1307
batch=120 mse_vec=124.7804 mse_chk=337.7808 mse_trj=274.7607 tau_l2=51.5942 sem_align=0.1307
batch=140 mse_vec=166.1155 mse_chk=275.2057 mse_trj=227.3145 tau_l2=51.5906 sem_align=0.1306
batch=160 mse_vec=130.3184 mse_chk=330.5708 mse_trj=276.9996 tau_l2=51.5934 sem_align=0.1307
batch=180 mse_vec=64.9382 mse_chk=188.2768 mse_trj=165.8391 tau_l2=51.6026 sem_align=0.1307
[DONE] Saved report: {'num_samples': 723, 'num_batches': 181, 'avg_mse_vector': 98.83019052157745, 'avg_mse_chunk': 228.9720206076269, 'avg_mse_traj': 191.032333921991, 'avg_tau_l2': 51.59980976252266, 'avg_semantic_text_alignment': 0.13066553295646582, 'hard_thresholds': {'vec': 0.1, 'chk': 0.2, 'trj': 0.2}, 'avg_hard_mse_vector': 98.87706757647038, 'avg_hard_mse_chunk': 229.02830963345815, 'avg_hard_mse_traj': 191.0671798051805, 'hard_sample_fraction': 1.0, 'total_hard_samples': 723} |