Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

.claude/settings.local.json +9 -0
README.md +59 -0
config.json +60 -0
model.safetensors +3 -0
special_tokens_map.json +1 -0
tokenizer.json +0 -0
tokenizer_config.json +176 -0

.claude/settings.local.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "permissions": {
+    "allow": [
+      "Bash(fd:*)",
+      "Bash(ls:*)",
+      "Bash(huggingface-cli upload:*)"
+    ]
+  }
+}

README.md ADDED Viewed

	@@ -0,0 +1,59 @@

+---
+license: apache-2.0
+tags:
+- vision
+- moondream
+- mlx
+- int8
+- quantized
+base_model: moondream/moondream3-preview
+---
+# MD3P-Int8 - INT8 Quantized Moondream3 for MLX
+An INT8 quantized version of Moondream3, offering a balance between model quality and size for MLX deployment.
+## Model Details
+| Component | Original (BF16) | This Model |
+|-----------|-----------------|------------|
+| MoE Experts (layers 4-23) | BF16 | **int8** |
+| Vision Encoder | BF16 | BF16 (preserved) |
+| Text Attention | BF16 | **int8** |
+| Text MLP (layers 0-3) | BF16 | **int8** |
+| Embeddings | BF16 | BF16 (preserved) |
+| **Total Size** | ~12 GB | **~10 GB** |
+## Quantization Details
+- **Method**: Affine quantization (bits=8, group_size=64)
+- **Target**: Text model layers (attention, MLP, MoE experts)
+- **Preserved**: Vision encoder and embeddings at BF16 for quality
+## Comparison with INT4 Variants
+| Model | Size | Quality | Use Case |
+|-------|------|---------|----------|
+| md3p-int8 (this) | 10 GB | Higher | Desktop/Server MLX |
+| md3p-int4 | 6.48 GB | Medium | Memory-constrained |
+| md3p-int4-smol | 5.43 GB | Lower | iOS (~6GB limit) |
+## Usage
+This model is designed for use with MLX-based Moondream implementations.
+```python
+# Example with mlx-lm or similar
+from mlx_lm import load, generate
+model, tokenizer = load("lewi/md3p-int8")
+```
+## Source & License
+- **Original Model**: [moondream/moondream3-preview](https://huggingface.co/moondream/moondream3-preview)
+- **License**: Apache 2.0 (same as original)
+## Acknowledgments
+Thanks to the [Moondream](https://moondream.ai/) team for the original model and Apache 2.0 license.

config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "architectures": [
+    "HfMoondream"
+  ],
+  "auto_map": {
+    "AutoConfig": "hf_moondream.HfConfig",
+    "AutoModelForCausalLM": "hf_moondream.HfMoondream"
+  },
+  "config": {
+    "skills": [
+      "query",
+      "caption",
+      "detect",
+      "point"
+    ]
+  },
+  "model_type": "moondream3",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.1",
+  "text": {
+    "dim": 2048,
+    "ff_dim": 8192,
+    "n_layers": 24,
+    "vocab_size": 51200,
+    "max_context": 4096,
+    "n_heads": 32,
+    "n_kv_heads": 32,
+    "prefix_attn": 730,
+    "moe": {
+      "num_experts": 64,
+      "start_layer": 4,
+      "experts_per_token": 8,
+      "expert_inner_dim": 1024
+    },
+    "bits": 8,
+    "group_size": 64
+  },
+  "vision": {
+    "enc_dim": 1152,
+    "enc_patch_size": 14,
+    "enc_n_layers": 27,
+    "enc_ff_dim": 4304,
+    "enc_n_heads": 16,
+    "proj_out_dim": 2048,
+    "crop_size": 378,
+    "in_channels": 3,
+    "max_crops": 12,
+    "overlap_margin": 4,
+    "proj_inner_dim": 8192
+  },
+  "region": {
+    "dim": 2048,
+    "coord_feat_dim": 256,
+    "coord_out_dim": 1024,
+    "size_feat_dim": 512,
+    "size_out_dim": 2048,
+    "group_size": null
+  },
+  "dtype": "bfloat16"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b17904a123e4ccbcb4e40dd731530aeaca7be45bc87f9d1dd545ccccd02430f7
+size 10987576136

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,176 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<|md_reserved_0|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<|md_reserved_1|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<|md_reserved_2|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<|md_reserved_3|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<|md_reserved_4|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<|md_reserved_5|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<|md_reserved_6|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<|md_reserved_7|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<|md_reserved_8|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<|md_reserved_9|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<|md_reserved_10|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<|md_reserved_11|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<|md_reserved_12|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<|md_reserved_13|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<|md_reserved_14|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<|md_reserved_15|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "<|md_reserved_16|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "<|md_reserved_17|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "19": {
+      "content": "<|md_reserved_18|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "20": {
+      "content": "<|md_reserved_19|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "tokenizer_class": "PreTrainedTokenizer"
+}