File size: 1,327 Bytes
ca284fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
{
  "model_type": "embernet_vlm",
  "architecture": "BitNet b1.58 MoE VLM",
  "vision_encoder": {
    "model_name": "google/siglip-base-patch16-224",
    "num_image_tokens": 64,
    "freeze_vision": true
  },
  "language_decoder": {
    "vocab_size": 32002,
    "hidden_size": 768,
    "intermediate_size": 2048,
    "num_layers": 16,
    "num_attention_heads": 12,
    "num_kv_heads": 6,
    "max_position_embeddings": 4096,
    "num_experts": 8,
    "num_experts_per_tok": 2,
    "use_shared_expert": true,
    "expert_domains": [
      "vision_ocr",
      "vision_diagram",
      "code_math_chart",
      "code_math_formula",
      "spatial_scene",
      "spatial_reasoning",
      "agentic_knowledge",
      "agentic_reasoning"
    ],
    "quantisation": "BitNet b1.58 (ternary)",
    "activation_bits": 4
  },
  "torch_dtype": "bfloat16",
  "transformers_version": ">=4.36.0",
  "parameter_counts": {
    "vision_encoder": 107748864,
    "vision_encoder_breakdown": {
      "encoder": 92884224,
      "compressor": 2363904,
      "pooler": 2412288,
      "projector": 10088448
    },
    "decoder_total": 733055360,
    "decoder_embeddings": 24577536,
    "decoder_attention": 0,
    "decoder_router": 98432,
    "decoder_shared_expert": 75554816,
    "decoder_domain_experts": 604438528,
    "num_domain_experts": 8
  }
}