| { |
| "_name_or_path": "/home/lambda/claude/idm_output_omx_v2/checkpoint-20000", |
| "action_dim": 32, |
| "action_head_cfg": { |
| "_convert_": "object", |
| "_target_": "gr00t.model.action_head.flow_matching_action_head_idm.FlowMatchingActionHeadIDM", |
| "config": { |
| "_recursive_": false, |
| "_target_": "gr00t.model.action_head.flow_matching_action_head_idm.FlowMatchingActionHeadIDMConfig", |
| "action_dim": 32, |
| "action_horizon": 16, |
| "add_pos_embed": true, |
| "add_seperator_token": true, |
| "add_view_embed": true, |
| "backbone_features_projector_cfg": null, |
| "diffusion_model_cfg": { |
| "_target_": "gr00t.model.action_head.cross_attention_dit.DiT", |
| "attention_head_dim": 64, |
| "dropout": 0.2, |
| "final_dropout": true, |
| "interleave_self_attention": true, |
| "norm_type": "ada_norm", |
| "num_attention_heads": 16, |
| "num_layers": 8, |
| "output_dim": 1024, |
| "positional_embeddings": null |
| }, |
| "hidden_size": 1024, |
| "max_action_dim": 32, |
| "max_num_views": 6, |
| "max_state_dim": 64, |
| "mm_projector_cfg": { |
| "_convert_": "object", |
| "_target_": "gr00t.model.action_head.multimodal_projector.MultimodalProjector", |
| "config": { |
| "_target_": "gr00t.model.action_head.multimodal_projector.MultimodalProjectorConfig", |
| "hidden_size": 1024, |
| "mm_hidden_size": 1024, |
| "mm_projector_type": "mlp_doubledownsample" |
| } |
| }, |
| "mm_vision_select_layer": -2, |
| "model_dtype": "float32", |
| "noise_beta_alpha": 1.5, |
| "noise_beta_beta": 1.0, |
| "noise_s": 0.999, |
| "num_inference_timesteps": 16, |
| "num_timestep_buckets": 1000, |
| "siglip_hidden_size": 1024, |
| "siglip_model_cfg": { |
| "_convert_": "object", |
| "_target_": "gr00t.model.action_head.siglip.SiglipModel.from_pretrained", |
| "pretrained_model_name_or_path": "google/siglip2-large-patch16-256" |
| }, |
| "tune_vision_tower": true, |
| "vl_self_attention_cfg": { |
| "_target_": "gr00t.model.action_head.cross_attention_dit.SelfAttentionTransformer", |
| "attention_head_dim": 64, |
| "dropout": 0.2, |
| "final_dropout": true, |
| "num_attention_heads": 16, |
| "num_layers": 4, |
| "positional_embeddings": null |
| } |
| } |
| }, |
| "action_horizon": 16, |
| "architectures": [ |
| "IDM" |
| ], |
| "attn_implementation": null, |
| "backbone_cfg": { |
| "_target_": "gr00t.model.backbone.IdentityBackbone" |
| }, |
| "compute_dtype": "bfloat16", |
| "hidden_size": 0, |
| "model_dtype": "float32", |
| "model_type": "idm", |
| "torch_dtype": "float32", |
| "transformers_version": "4.45.2" |
| } |
|
|