{ "beit_version": "large", "encoder_embed_dim": 1024, "out_embed_dim": 1024, "image_size": 224, "visual_mask_size": 14, "loss_names": { "itc": 1 }, "encoder_layers": 21, "beit3_vl_layers": 3, "tokenizer_type": "GLMChineseTokenizer", "tokenizer": ".", "vocab_size": 115244, "whole_word_masking": false, "precision": 32, "test_only": true, "flash_attn": false, "modelscope": { "model_id": "M2Cognition/M2_Encoder_Large" }, "model_file": "m2_encoder_1B.safetensors", "model_type": "m2_encoder", "architectures": [ "M2EncoderModel" ], "processor_class": "M2EncoderProcessor", "auto_map": { "AutoConfig": "configuration_m2_encoder.M2EncoderConfig", "AutoModel": "modeling_m2_encoder.M2EncoderModel", "AutoProcessor": "processing_m2_encoder.M2EncoderProcessor" } }