{ "action_horizon": 40, "architectures": [ "AlinVLAv0Memory" ], "backbone_model_type": "contextvla_qwen3_vl_legacy", "backbone_trainable_params_fp32": true, "color_jitter_params": { "brightness": 0.3, "contrast": 0.25, "hue": 0.0, "saturation": 0.05 }, "conversation_image_first": false, "diffusion_model_cfg": { "action_head_max_seq_len": 512, "attention_head_dim": 64, "depth_multi_stream": 4, "depth_single_stream": 8, "dropout": 0.2, "final_dropout": true, "num_attention_heads": 24, "output_dim": 1024, "positional_embeddings": "rope_sa_only", "pre_norm": "layer_norm", "qk_norm": "rms_norm", "rope_theta": 10000.0, "sa_dim": 1536, "set_triple_stream_for_mq": false, "set_triple_stream_for_state": false, "temb_type": "input_token", "use_swiglu": true, "vl_dim": 4096 }, "dtype": "bfloat16", "load_bf16": true, "memory_cfg": { "hidden_size": 4096, "intermediate_size": 16384, "max_position_embeddings": 32, "num_attention_heads": 16, "num_hidden_layers": 2, "num_key_value_heads": 16, "rms_norm_eps": 1e-05, "use_causal_attn": true, "use_rope": true }, "memory_length": 4, "memory_meta_queries_mode": "meta_only", "memory_video_delta_indices": [ -48, -32, -16, 0 ], "model_name": "huiwon/alinvlm_v1_3", "model_type": "AlinVLAv0", "n_meta_queries": 64, "new_embodiment_train_ratio": 0, "qwen3_collator": true, "random_rotation_angle": null, "reproject_vision": false, "state_dropout_prob": 0.3, "training_rtc_max_overlap": 4, "transformers_version": "4.57.0", "tune_diffusion_model": true, "tune_llm": false, "tune_projector": true, "tune_top_llm_layers": 4, "tune_visual": false, "use_memory": true, "use_mmditv0": false, "use_mmditv1": true, "use_relative_action": true, "use_video": true, "video_length": 4 }