{ "architectures": [ "ColQwen3Omni" ], "audio_config": { "activation_dropout": 0, "activation_function": "gelu", "attention_dropout": 0, "conv_chunksize": 500, "d_model": 1024, "downsample_hidden_size": 480, "dropout": 0, "dtype": "bfloat16", "encoder_attention_heads": 16, "encoder_ffn_dim": 4096, "encoder_layers": 24, "initializer_range": 0.02, "max_source_positions": 1500, "model_type": "bidirlm_omni_audio", "n_window": 100, "n_window_infer": 400, "num_hidden_layers": 24, "num_mel_bins": 128, "output_dim": 2048, "scale_embedding": false }, "audio_end_token_id": 151670, "audio_start_token_id": 151669, "audio_token_id": 151676, "clf_pooling": "late", "dtype": "bfloat16", "freeze_audio": true, "freeze_visual": true, "id2label": { "0": "LABEL_0" }, "image_token_id": 151655, "label2id": { "LABEL_0": 0 }, "max_image_size": null, "max_sequence_length": 1024, "model_type": "bidirlm_omni", "rope_parameters": { "mrope_section": [ 24, 20, 20 ], "rope_theta": 5000000.0, "rope_type": "default" }, "text_config": { "attention_bias": false, "attention_dropout": 0.0, "clf_pooling": "late", "dtype": "bfloat16", "head_dim": 128, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 6144, "is_causal": false, "max_position_embeddings": 128000, "model_type": "bidirlm_omni_text", "num_attention_heads": 16, "num_hidden_layers": 28, "num_key_value_heads": 8, "rms_norm_eps": 1e-06, "rope_parameters": { "mrope_section": [ 24, 20, 20 ], "rope_theta": 5000000.0, "rope_type": "default" }, "rope_theta": 5000000.0, "tie_word_embeddings": false, "vocab_size": 151936 }, "text_weights_source": "visual", "tie_word_embeddings": true, "transformers_version": "5.8.0", "trust_remote_code": true, "video_token_id": 151656, "vision_config": { "deepstack_visual_indexes": [ 8, 16, 24 ], "depth": 24, "dtype": "bfloat16", "hidden_act": "gelu_pytorch_tanh", "hidden_size": 1024, "in_channels": 3, "initializer_range": 0.02, "intermediate_size": 4096, "model_type": "bidirlm_omni_vision", "num_heads": 16, "num_position_embeddings": 2304, "out_hidden_size": 2048, "patch_size": 16, "spatial_merge_size": 2, "temporal_patch_size": 2 }, "vision_end_token_id": 151653, "vision_start_token_id": 151652 }