euhidaman
/

MicroVLM-V

+{
+  "qwen2_5_config": {
+    "hidden_size": 896,
+    "num_hidden_layers": 24,
+    "num_attention_heads": 14,
+    "num_key_value_heads": 2,
+    "intermediate_size": 4864,
+    "vocab_size": 151936,
+    "max_position_embeddings": 32768,
+    "rope_theta": 1000000.0,
+    "rms_norm_eps": 1e-06,
+    "tie_word_embeddings": true,
+    "use_sliding_window": false,
+    "sliding_window": null,
+    "max_window_layers": 21,
+    "head_dim": 64,
+    "kv_head_dim": 128,
+    "model_type": "qwen2"
+  },
+  "deit_tiny_config": {
+    "image_size": 224,
+    "patch_size": 16,
+    "num_channels": 3,
+    "hidden_size": 192,
+    "num_hidden_layers": 12,
+    "num_attention_heads": 3,
+    "intermediate_size": 768,
+    "hidden_dropout_prob": 0.0,
+    "attention_probs_dropout_prob": 0.0,
+    "layer_norm_eps": 1e-06,
+    "num_patches": 196,
+    "encoder_stride": 16
+  },
+  "quantization_config": {
+    "memory_quantization": {
+      "bits": 1.58,
+      "method": "bitnet_158",
+      "description": "1.58-bit quantization for episodic memory and final model"
+    },
+    "training_quantization": {
+      "weight_bits": 4,
+      "activation_bits": 4,
+      "method": "symmetric",
+      "description": "4-bit quantization for weights and activations during training"
+    },
+    "inference_quantization": {
+      "model_bits": 1.58,
+      "method": "bitnet_158",
+      "description": "1.58-bit quantization for final deployed model"
+    }
+  },
+  "model_dimensions": {
+    "qwen_hidden_dim": 896,
+    "deit_embed_dim": 192,
+    "vision_hidden_size": 192,
+    "language_hidden_size": 896,
+    "num_patches": 196,
+    "k_prefix": 25,
+    "adapter_projection_dim": 896,
+    "alignment_dim": 128,
+    "memory_size": 64,
+    "memory_dim": 896,
+    "memory_target_layers": 6,
+    "memory_num_heads": 4,
+    "memory_num_layers": 6,
+    "head_dim": 64,
+    "scope_hidden_dim": 256,
+    "itm_hidden_dim": 256,
+    "fusion_layers": [
+      9,
+      11
+    ],
+    "num_fusion_heads": 2,
+    "fusion_dim": 384,
+    "w_logvar_setting": 1,
+    "deterministic_memory": false
+  },
+  "itc_config": {
+    "use_itc_queue": true,
+    "itc_queue_size": 256,
+    "itc_embed_dim": 128,
+    "description": "FIBER-style ITC queue for better negative sampling (lightweight version)"
+  },
+  "estimated_sizes": {
+    "qwen_original_mb": 1976,
+    "deit_original_mb": 22.8,
+    "qwen_4bit_mb": 247.0,
+    "deit_4bit_mb": 2.85,
+    "qwen_158bit_mb": 98.80000000000001,
+    "adapter_mb": 0.777728,
+    "memory_original_mb": 0.229376,
+    "memory_158bit_mb": 0.011468800000000001,
+    "wm_projection_mb": 11.010048,
+    "scopenet_mb": 1.180672,
+    "fusion_mb": 0.009216,
+    "alignment_mb": 0.557056,
+    "itm_head_mb": 1.11616,
+    "overhead_total_mb": 14.880256,
+    "total_fp32_mb": 2013.6802559999999,
+    "total_4bit_qwen_mb": 273.28025599999995,
+    "total_fully_quantized_mb": 116.53025600000001
+  },
+  "metadata": {
+    "qwen_model": "Qwen/Qwen2.5-0.5B",
+    "deit_model": "facebook/deit-tiny-patch16-224",
+    "target_total_size_mb": 500,
+    "description": "MicroVLM-V compact model configuration with 4-bit quantization for <1GB deployment",
+    "quantization_default": "4-bit Qwen, FP16 DeiT, FP32 overhead"
+  }
+}