Adding `safetensors` variant of this model
#3
by
SFconvertbot
- opened
- README.md +11 -3
- model-00001-of-00007.safetensors +3 -0
- model-00002-of-00007.safetensors +3 -0
- model-00003-of-00007.safetensors +3 -0
- model-00004-of-00007.safetensors +3 -0
- model-00005-of-00007.safetensors +3 -0
- model-00006-of-00007.safetensors +3 -0
- model-00007-of-00007.safetensors +3 -0
- model.safetensors.index.json +0 -0
- modeling_llama_butler.py +2 -2
README.md
CHANGED
|
@@ -1,9 +1,11 @@
|
|
| 1 |
---
|
| 2 |
-
license: mit
|
| 3 |
-
library_name: transformers
|
| 4 |
base_model:
|
| 5 |
- deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
|
|
|
|
|
|
|
|
|
| 6 |
---
|
|
|
|
| 7 |
# TokenButler
|
| 8 |
<!-- markdownlint-disable first-line-h1 -->
|
| 9 |
<!-- markdownlint-disable html -->
|
|
@@ -17,7 +19,7 @@ base_model:
|
|
| 17 |
<hr>
|
| 18 |
<div align="center" style="line-height: 1;">
|
| 19 |
<!-- Paper Badge -->
|
| 20 |
-
<a href="https://
|
| 21 |
<img alt="Paper"
|
| 22 |
src="https://img.shields.io/badge/Paper-View-orange?logo=readthedocs&logoColor=white"
|
| 23 |
style="display: inline-block; vertical-align: middle;"/>
|
|
@@ -28,6 +30,12 @@ base_model:
|
|
| 28 |
src="https://img.shields.io/badge/GitHub-Repo-black?logo=github&logoColor=white"
|
| 29 |
style="display: inline-block; vertical-align: middle;"/>
|
| 30 |
</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
</div>
|
| 32 |
|
| 33 |
<br>
|
|
|
|
| 1 |
---
|
|
|
|
|
|
|
| 2 |
base_model:
|
| 3 |
- deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
| 4 |
+
library_name: transformers
|
| 5 |
+
license: mit
|
| 6 |
+
pipeline_tag: text-generation
|
| 7 |
---
|
| 8 |
+
|
| 9 |
# TokenButler
|
| 10 |
<!-- markdownlint-disable first-line-h1 -->
|
| 11 |
<!-- markdownlint-disable html -->
|
|
|
|
| 19 |
<hr>
|
| 20 |
<div align="center" style="line-height: 1;">
|
| 21 |
<!-- Paper Badge -->
|
| 22 |
+
<a href="https://arxiv.org/abs/2503.07518" target="_blank" style="margin: 2px;">
|
| 23 |
<img alt="Paper"
|
| 24 |
src="https://img.shields.io/badge/Paper-View-orange?logo=readthedocs&logoColor=white"
|
| 25 |
style="display: inline-block; vertical-align: middle;"/>
|
|
|
|
| 30 |
src="https://img.shields.io/badge/GitHub-Repo-black?logo=github&logoColor=white"
|
| 31 |
style="display: inline-block; vertical-align: middle;"/>
|
| 32 |
</a>
|
| 33 |
+
<!-- Project Page Badge -->
|
| 34 |
+
<a href="https://abdelfattah-lab.github.io/TokenButler/" target="_blank" style="margin: 2px;">
|
| 35 |
+
<img alt="Project Page"
|
| 36 |
+
src="https://img.shields.io/badge/Project%20Page-🌐-lightgrey"
|
| 37 |
+
style="display: inline-block; vertical-align: middle;"/>
|
| 38 |
+
</a>
|
| 39 |
</div>
|
| 40 |
|
| 41 |
<br>
|
model-00001-of-00007.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4a06424d231b4b858b6d735de2be355a9d81b5558c21925bd8eeabad7f62140
|
| 3 |
+
size 4816363256
|
model-00002-of-00007.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fb6c677968f55ccf762344219ea002c980b4eee36bc8004bc9908a90e855759
|
| 3 |
+
size 4999813072
|
model-00003-of-00007.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:707dd72cdaee4b1099afa9416e41437371a901e98a6e4435a30bb65781773fa0
|
| 3 |
+
size 4999813104
|
model-00004-of-00007.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6feb8a06d88592e03352c2cf5d8a6132038ccce44939d88437d39d5a9621dd70
|
| 3 |
+
size 4832007496
|
model-00005-of-00007.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09c3f1f6241553ab4304aebe3c832ef53e6a877e7e29f34eb23151f38530900a
|
| 3 |
+
size 4999813120
|
model-00006-of-00007.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f42f22d1e85d2d9ef439f10fd24ce74ca05e260a84aaea3980678f2b64769f4
|
| 3 |
+
size 4999813128
|
model-00007-of-00007.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eebb1a694f4b88729a06f520d99b489a6778a58d831b7b2dadf51f37c4ae5f80
|
| 3 |
+
size 2806039320
|
model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
modeling_llama_butler.py
CHANGED
|
@@ -918,7 +918,7 @@ class LlamaAttentionExperimental(nn.Module):
|
|
| 918 |
self.num_key_value_groups = self.num_heads // self.num_key_value_heads
|
| 919 |
self.max_position_embeddings = config.max_position_embeddings
|
| 920 |
self.rope_theta = config.rope_theta
|
| 921 |
-
self.inference_mode =
|
| 922 |
self.producer = producer
|
| 923 |
self.layer_idx = layer_idx
|
| 924 |
self.token_sparse_method = None
|
|
@@ -1217,7 +1217,7 @@ class LlamaAttentionExperimental(nn.Module):
|
|
| 1217 |
num_active = (~attention_mask.bool()).sum(dim=-1).expand_as(num_deact) # Number of tokens active at this position if zero-sparsity
|
| 1218 |
effective_sparsity = 100 * (additional_deact.float() / num_active.float()).mean().item()
|
| 1219 |
self.effective_sparsity = effective_sparsity
|
| 1220 |
-
print("Effective Sparsity:", effective_sparsity, "%\t Sequence Length:", q_len)
|
| 1221 |
if self.layer_idx == 0:
|
| 1222 |
if self.effective_sparsity is None:
|
| 1223 |
self.effective_sparsity = 0.0
|
|
|
|
| 918 |
self.num_key_value_groups = self.num_heads // self.num_key_value_heads
|
| 919 |
self.max_position_embeddings = config.max_position_embeddings
|
| 920 |
self.rope_theta = config.rope_theta
|
| 921 |
+
self.inference_mode = True
|
| 922 |
self.producer = producer
|
| 923 |
self.layer_idx = layer_idx
|
| 924 |
self.token_sparse_method = None
|
|
|
|
| 1217 |
num_active = (~attention_mask.bool()).sum(dim=-1).expand_as(num_deact) # Number of tokens active at this position if zero-sparsity
|
| 1218 |
effective_sparsity = 100 * (additional_deact.float() / num_active.float()).mean().item()
|
| 1219 |
self.effective_sparsity = effective_sparsity
|
| 1220 |
+
# print("Effective Sparsity:", effective_sparsity, "%\t Sequence Length:", q_len)
|
| 1221 |
if self.layer_idx == 0:
|
| 1222 |
if self.effective_sparsity is None:
|
| 1223 |
self.effective_sparsity = 0.0
|