junzzhu commited on
Commit
a1fa183
·
verified ·
1 Parent(s): f0f08a0

Add atomllama-33K-5x5-DigitMesh-sparse model

Browse files
README.md CHANGED
@@ -1,3 +1,78 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ tags:
5
+ - llama
6
+ - causal-lm
7
+ - digit-recognition
8
+ - sparse-model
9
+ - sparsegpt
10
+ - model-compression
11
+ - 50-percent-sparse
12
+ license: apache-2.0
13
+ base_model: junzzhu/atomllama-33K-5x5-DigitMesh
14
+ library_name: transformers
15
+ pipeline_tag: text-generation
16
+ ---
17
+
18
+ # AtomLlama-33K-5x5-DigitMesh-Sparse
19
+
20
+ A 50% sparse version of [atomllama-33K-5x5-DigitMesh](https://huggingface.co/junzzhu/atomllama-33K-5x5-DigitMesh) for efficient 5×5 digit mesh recognition.
21
+
22
+ ## Model Description
23
+
24
+ This is a **~50% unstructured sparse** variant of the AtomLlama-33K-5x5-DigitMesh model, pruned using the SparseGPT algorithm. Half of the model weights have been set to zero while maintaining digit recognition accuracy through second-order optimization.
25
+
26
+ ### Key Features
27
+
28
+ - **Base Model**: [junzzhu/atomllama-33K-5x5-DigitMesh](https://huggingface.co/junzzhu/atomllama-33K-5x5-DigitMesh)
29
+ - **Sparsity**: ~50% (unstructured)
30
+ - **Pruning Method**: SparseGPT with Hessian-based importance scoring
31
+ - **Parameters**: ~33K total, ~16.5K non-zero
32
+ - **Architecture**: LlamaForCausalLM
33
+ - **Task**: 5×5 binary digit mesh recognition
34
+
35
+ ## Usage
36
+
37
+ ### Serving with vLLM
38
+
39
+ ```bash
40
+ python -m vllm.entrypoints.openai.api_server \
41
+ --model ./models/atomllama-33K-5x5-DigitMesh-sparse \
42
+ --max-model-len 32
43
+ ```
44
+
45
+ ### Example Inference
46
+
47
+ ```bash
48
+ curl http://localhost:8000/v1/completions \
49
+ -H 'Content-Type: application/json' \
50
+ -d '{
51
+ "model": "./models/atomllama-33K-5x5-DigitMesh-sparse",
52
+ "prompt": "1 1 1 1 1 1 0 0 0 1 1 0 0 0 1 1 0 0 0 1 1 1 1 1 1 <SEP>",
53
+ "max_tokens": 1,
54
+ "temperature": 0
55
+ }'
56
+ ```
57
+ Expected output: `D0`
58
+
59
+ ## Sparsity Details
60
+
61
+ - **Type**: Unstructured (weights pruned individually based on importance)
62
+ - **Target Sparsity**: 50%
63
+ - **Calibration**: Pruned using digit pattern activations
64
+ - **Benefits**: Reduced memory footprint and potential inference speedup with sparse tensor libraries
65
+
66
+ ## License
67
+
68
+ Apache-2.0
69
+
70
+ ## Citation
71
+
72
+ ```bibtex
73
+ @misc{atomllama-33k-digitMesh-sparse,
74
+ title={AtomLlama-33K-5x5-DigitMesh-Sparse: A 50% Sparse Model for Digit Recognition},
75
+ author={Jun Zhu},
76
+ year={2026},
77
+ howpublished={\url{https://huggingface.co/junzzhu/atomllama-33K-5x5-DigitMesh-sparse}}
78
+ }
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.3,
7
+ "bos_token_id": 2,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "head_dim": 8,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 32,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 128,
15
+ "max_position_embeddings": 32,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 4,
19
+ "num_hidden_layers": 2,
20
+ "num_key_value_heads": 4,
21
+ "pad_token_id": 13,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_scaling": null,
25
+ "rope_theta": 10000.0,
26
+ "tie_word_embeddings": false,
27
+ "transformers_version": "4.57.3",
28
+ "use_cache": true,
29
+ "vocab_size": 14
30
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 13,
6
+ "transformers_version": "4.57.3"
7
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d19a6d137578604cbe73e5aa3a84729eeec5cfd8b7a86347372d471a0d6bfde
3
+ size 137408
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<SEP>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<SEP>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<PAD>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 2,
8
+ "content": "<SEP>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 13,
17
+ "content": "<PAD>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ }
24
+ ],
25
+ "normalizer": null,
26
+ "pre_tokenizer": {
27
+ "type": "WhitespaceSplit"
28
+ },
29
+ "post_processor": null,
30
+ "decoder": null,
31
+ "model": {
32
+ "type": "WordLevel",
33
+ "vocab": {
34
+ "0": 0,
35
+ "1": 1,
36
+ "<SEP>": 2,
37
+ "D0": 3,
38
+ "D1": 4,
39
+ "D2": 5,
40
+ "D3": 6,
41
+ "D4": 7,
42
+ "D5": 8,
43
+ "D6": 9,
44
+ "D7": 10,
45
+ "D8": 11,
46
+ "D9": 12,
47
+ "<PAD>": 13
48
+ },
49
+ "unk_token": "<unk>"
50
+ }
51
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "2": {
4
+ "content": "<SEP>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "13": {
12
+ "content": "<PAD>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ }
19
+ },
20
+ "bos_token": "<SEP>",
21
+ "clean_up_tokenization_spaces": false,
22
+ "eos_token": "<SEP>",
23
+ "extra_special_tokens": {},
24
+ "model_max_length": 1000000000000000019884624838656,
25
+ "pad_token": "<PAD>",
26
+ "tokenizer_class": "PreTrainedTokenizerFast"
27
+ }