{ "return_dict": true, "output_hidden_states": false, "dtype": "float16", "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "architectures": [ "Ovis" ], "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "problem_type": null, "_name_or_path": "AIDC-AI/Ovis2-4B", "transformers_version": "5.3.0", "auto_map": { "AutoConfig": "configuration_ovis.OvisConfig", "AutoModelForCausalLM": "modeling_ovis.Ovis" }, "model_type": "ovis", "llm_config": { "vocab_size": 151936, "max_position_embeddings": 32768, "hidden_size": 2048, "intermediate_size": 11008, "num_hidden_layers": 36, "num_attention_heads": 16, "use_sliding_window": false, "sliding_window": null, "max_window_layers": 70, "num_key_value_heads": 2, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-06, "use_cache": true, "attention_dropout": 0.0, "layer_types": [ "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention" ], "pad_token_id": null, "bos_token_id": 151643, "eos_token_id": 151645, "tie_word_embeddings": true, "rope_parameters": { "rope_theta": 1000000.0, "rope_type": "default" }, "return_dict": true, "output_hidden_states": false, "dtype": "bfloat16", "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "architectures": [ "Qwen2ForCausalLM" ], "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "problem_type": null, "_name_or_path": "Qwen/Qwen2.5-3B-Instruct", "_attn_implementation_autoset": true, "add_cross_attention": false, "cross_attention_hidden_size": null, "decoder_start_token_id": null, "finetuning_task": null, "is_decoder": false, "prefix": null, "pruned_heads": {}, "sep_token_id": null, "task_specific_params": null, "tf_legacy_loss": false, "tie_encoder_decoder": false, "tokenizer_class": null, "torchscript": false, "use_bfloat16": false, "model_type": "qwen2", "output_attentions": false }, "visual_tokenizer_config": { "return_dict": true, "output_hidden_states": false, "dtype": null, "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "architectures": null, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "problem_type": null, "_name_or_path": "", "_attn_implementation_autoset": true, "add_cross_attention": false, "backbone_kwargs": {}, "bos_token_id": null, "cross_attention_hidden_size": null, "decoder_start_token_id": null, "eos_token_id": null, "finetuning_task": null, "is_decoder": false, "pad_token_id": null, "prefix": null, "pruned_heads": {}, "sep_token_id": null, "task_specific_params": null, "tf_legacy_loss": false, "tie_encoder_decoder": false, "tie_word_embeddings": true, "tokenizer_class": null, "torchscript": false, "use_bfloat16": false, "use_indicators": false, "vocab_size": 65536, "tokenize_function": "softmax", "tau": 1.0, "depths": null, "drop_cls_token": false, "backbone_config": { "return_dict": true, "output_hidden_states": false, "dtype": "bfloat16", "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "architectures": [ "AIMv2Model" ], "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "problem_type": null, "_name_or_path": "apple/aimv2-huge-patch14-448", "_attn_implementation_autoset": true, "add_cross_attention": false, "auto_map": { "AutoConfig": "configuration_aimv2.AIMv2Config", "AutoModel": "modeling_aimv2.AIMv2Model", "FlaxAutoModel": "modeling_flax_aimv2.FlaxAIMv2Model" }, "bos_token_id": null, "cross_attention_hidden_size": null, "decoder_start_token_id": null, "eos_token_id": null, "finetuning_task": null, "is_decoder": false, "pad_token_id": null, "prefix": null, "pruned_heads": {}, "sep_token_id": null, "task_specific_params": null, "tf_legacy_loss": false, "tie_encoder_decoder": false, "tie_word_embeddings": true, "tokenizer_class": null, "torchscript": false, "use_bfloat16": false, "hidden_size": 1536, "intermediate_size": 4096, "num_hidden_layers": 24, "num_attention_heads": 12, "num_channels": 3, "patch_size": 14, "image_size": 448, "attention_dropout": 0.0, "rms_norm_eps": 1e-05, "projection_dropout": 0.0, "qkv_bias": false, "use_bias": false, "model_type": "aimv2", "output_attentions": false }, "hidden_stride": 2, "model_type": "aimv2_visual_tokenizer", "output_attentions": false }, "multimodal_max_length": 32768, "hidden_size": 2048, "conversation_formatter_class": "QwenConversationFormatter", "llm_attn_implementation": "flash_attention_2", "disable_tie_weight": false, "output_attentions": false, "quantization_config": { "quant_method": "awq", "bits": 4, "group_size": 128, "version": "gemm", "zero_point": true, "modules_to_not_convert": [ "visual_tokenizer", "vte" ] } }