Add atomllama-33K-5x5-DigitMesh-sparse-q8

Browse files

Files changed (8) hide show

README.md +103 -3
config.json +65 -0
generation_config.json +10 -0
model.safetensors +3 -0
recipe.yaml +6 -0
special_tokens_map.json +23 -0
tokenizer.json +51 -0
tokenizer_config.json +27 -0

README.md CHANGED Viewed

@@ -1,3 +1,103 @@
----
-license: apache-2.0
----

+---
+language:
+  - en
+tags:
+  - llama
+  - causal-lm
+  - digit-recognition
+  - sparse-model
+  - quantized-model
+  - int8-quantization
+  - qat
+  - model-compression
+  - 50-percent-sparse
+license: apache-2.0
+base_model: junzzhu/atomllama-33K-5x5-DigitMesh-sparse
+library_name: transformers
+pipeline_tag: text-generation
+---
+# AtomLlama-33K-5x5-DigitMesh-Sparse-Q8
+An INT8 quantized version of [atomllama-33K-5x5-DigitMesh-sparse](https://huggingface.co/junzzhu/atomllama-33K-5x5-DigitMesh-sparse) for ultra-efficient 5×5 digit mesh recognition.
+## Model Description
+This is a **50% sparse + INT8 quantized** variant of the AtomLlama-33K-5x5-DigitMesh model, combining structured sparsity with Quantization Aware Training (QAT). This dual compression approach maintains digit recognition accuracy while significantly reducing model size and computational requirements.
+### Key Features
+- **Base Model**: [junzzhu/atomllama-33K-5x5-DigitMesh-sparse](https://huggingface.co/junzzhu/atomllama-33K-5x5-DigitMesh-sparse)
+- **Sparsity**: ~50% (unstructured)
+- **Quantization**: INT8 with Quantization Aware Training (QAT)
+- **Parameters**: ~33K total, ~16.5K non-zero, 8-bit precision
+- **Architecture**: LlamaForCausalLM
+- **Task**: 5×5 binary digit mesh recognition
+- **Compression**: ~3x smaller than original model (50% sparsity + 4x from INT8)
+## Usage
+### Basic Inference with Transformers
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Load model and tokenizer
+model_path = "./models/atomllama-33K-5x5-DigitMesh-sparse-q8"
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    dtype="auto",
+    device_map="auto"
+)
+# Example: Classify a 5x5 binary digit pattern (digit "0")
+pattern = "1 1 1 1 1 1 0 0 0 1 1 0 0 0 1 1 0 0 0 1 1 1 1 1 1"
+prompt = f"{pattern} <SEP>"
+# Tokenize and generate prediction
+inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
+inputs.pop("token_type_ids", None)
+outputs = model.generate(
+    **inputs,
+    max_new_tokens=1,
+    do_sample=False
+)
+# Decode the prediction
+prediction = tokenizer.decode(
+    outputs[0][len(inputs.input_ids[0]):],
+    skip_special_tokens=True
+).strip()
+print(f"Predicted digit: {prediction}")  # Expected: "D0"
+```
+## Compression Details
+### Sparsity
+- **Type**: Unstructured (weights pruned individually based on importance)
+- **Target Sparsity**: 50%
+- **Method**: SparseGPT with Hessian-based importance scoring
+### Quantization
+- **Precision**: INT8 (8-bit integers)
+- **Method**: Quantization Aware Training (QAT)
+- **Framework**: [Axolotl Sparse QAT Integration](https://github.com/junzzhu/axolotl/blob/main/src/axolotl/integrations/sparse_qat/)
+## License
+Apache-2.0
+## Citation
+```bibtex
+@misc{atomllama-33k-digitMesh-sparse-q8,
+  title={AtomLlama-33K-5x5-DigitMesh-Sparse-Q8: A 50% Sparse INT8 Quantized Model for Digit Recognition},
+  author={Jun Zhu},
+  year={2026},
+  howpublished={\url{https://huggingface.co/junzzhu/atomllama-33K-5x5-DigitMesh-sparse-q8}}
+}
+```

config.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.3,
+  "bos_token_id": 2,
+  "dtype": "float32",
+  "eos_token_id": 2,
+  "head_dim": 8,
+  "hidden_act": "silu",
+  "hidden_size": 32,
+  "initializer_range": 0.02,
+  "intermediate_size": 128,
+  "max_position_embeddings": 32,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 4,
+  "pad_token_id": 2,
+  "pretraining_tp": 1,
+  "quantization_config": {
+    "config_groups": {
+      "group_0": {
+        "format": "pack-quantized",
+        "input_activations": null,
+        "output_activations": null,
+        "targets": [
+          "Linear"
+        ],
+        "weights": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": null,
+          "num_bits": 8,
+          "observer": "minmax",
+          "observer_kwargs": {},
+          "strategy": "channel",
+          "symmetric": true,
+          "type": "int"
+        }
+      }
+    },
+    "format": "pack-quantized",
+    "global_compression_ratio": null,
+    "ignore": [
+      "lm_head"
+    ],
+    "kv_cache_scheme": null,
+    "quant_method": "compressed-tensors",
+    "quantization_status": "compressed",
+    "sparsity_config": {},
+    "transform_config": {},
+    "version": "0.12.2"
+  },
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.56.2",
+  "use_cache": false,
+  "vocab_size": 14
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 2,
+  "do_sample": true,
+  "eos_token_id": [
+    2
+  ],
+  "pad_token_id": 2,
+  "transformers_version": "4.56.2"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62c96c0e254a2645fb6fd5c3230e042e23c138fbda2f2fdf9d00ce3fce3ab43d
+size 45536

recipe.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+default_stage:
+  default_modifiers:
+    QuantizationModifier:
+      targets: [Linear]
+      ignore: [lm_head]
+      scheme: W8A16

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<SEP>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<SEP>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<SEP>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 2,
+      "content": "<SEP>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 13,
+      "content": "<PAD>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": null,
+  "pre_tokenizer": {
+    "type": "WhitespaceSplit"
+  },
+  "post_processor": null,
+  "decoder": null,
+  "model": {
+    "type": "WordLevel",
+    "vocab": {
+      "0": 0,
+      "1": 1,
+      "<SEP>": 2,
+      "D0": 3,
+      "D1": 4,
+      "D2": 5,
+      "D3": 6,
+      "D4": 7,
+      "D5": 8,
+      "D6": 9,
+      "D7": 10,
+      "D8": 11,
+      "D9": 12,
+      "<PAD>": 13
+    },
+    "unk_token": "<unk>"
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "added_tokens_decoder": {
+    "2": {
+      "content": "<SEP>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<PAD>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<SEP>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<SEP>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<SEP>",
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}