Instructions to use manu/colbidirlm-base with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use manu/colbidirlm-base with Transformers:
# Load model directly from transformers import ColQwen3Omni model = ColQwen3Omni.from_pretrained("manu/colbidirlm-base", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "architectures": [ | |
| "ColQwen3Omni" | |
| ], | |
| "audio_config": { | |
| "activation_dropout": 0, | |
| "activation_function": "gelu", | |
| "attention_dropout": 0, | |
| "conv_chunksize": 500, | |
| "d_model": 1024, | |
| "downsample_hidden_size": 480, | |
| "dropout": 0, | |
| "dtype": "bfloat16", | |
| "encoder_attention_heads": 16, | |
| "encoder_ffn_dim": 4096, | |
| "encoder_layers": 24, | |
| "initializer_range": 0.02, | |
| "max_source_positions": 1500, | |
| "model_type": "bidirlm_omni_audio", | |
| "n_window": 100, | |
| "n_window_infer": 400, | |
| "num_hidden_layers": 24, | |
| "num_mel_bins": 128, | |
| "output_dim": 2048, | |
| "scale_embedding": false | |
| }, | |
| "audio_end_token_id": 151670, | |
| "audio_start_token_id": 151669, | |
| "audio_token_id": 151676, | |
| "clf_pooling": "late", | |
| "dtype": "bfloat16", | |
| "freeze_audio": true, | |
| "freeze_visual": true, | |
| "id2label": { | |
| "0": "LABEL_0" | |
| }, | |
| "image_token_id": 151655, | |
| "label2id": { | |
| "LABEL_0": 0 | |
| }, | |
| "max_image_size": null, | |
| "max_sequence_length": 1024, | |
| "model_type": "bidirlm_omni", | |
| "rope_parameters": { | |
| "mrope_section": [ | |
| 24, | |
| 20, | |
| 20 | |
| ], | |
| "rope_theta": 5000000.0, | |
| "rope_type": "default" | |
| }, | |
| "text_config": { | |
| "attention_bias": false, | |
| "attention_dropout": 0.0, | |
| "clf_pooling": "late", | |
| "dtype": "bfloat16", | |
| "head_dim": 128, | |
| "hidden_act": "silu", | |
| "hidden_size": 2048, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 6144, | |
| "is_causal": false, | |
| "max_position_embeddings": 128000, | |
| "model_type": "bidirlm_omni_text", | |
| "num_attention_heads": 16, | |
| "num_hidden_layers": 28, | |
| "num_key_value_heads": 8, | |
| "rms_norm_eps": 1e-06, | |
| "rope_parameters": { | |
| "mrope_section": [ | |
| 24, | |
| 20, | |
| 20 | |
| ], | |
| "rope_theta": 5000000.0, | |
| "rope_type": "default" | |
| }, | |
| "rope_theta": 5000000.0, | |
| "tie_word_embeddings": false, | |
| "vocab_size": 151936 | |
| }, | |
| "text_weights_source": "visual", | |
| "tie_word_embeddings": true, | |
| "transformers_version": "5.8.0", | |
| "trust_remote_code": true, | |
| "video_token_id": 151656, | |
| "vision_config": { | |
| "deepstack_visual_indexes": [ | |
| 8, | |
| 16, | |
| 24 | |
| ], | |
| "depth": 24, | |
| "dtype": "bfloat16", | |
| "hidden_act": "gelu_pytorch_tanh", | |
| "hidden_size": 1024, | |
| "in_channels": 3, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 4096, | |
| "model_type": "bidirlm_omni_vision", | |
| "num_heads": 16, | |
| "num_position_embeddings": 2304, | |
| "out_hidden_size": 2048, | |
| "patch_size": 16, | |
| "spatial_merge_size": 2, | |
| "temporal_patch_size": 2 | |
| }, | |
| "vision_end_token_id": 151653, | |
| "vision_start_token_id": 151652 | |
| } | |