Adding `safetensors` variant of this model

#3
by SFconvertbot - opened
README.md CHANGED
@@ -1,9 +1,11 @@
1
  ---
2
- license: mit
3
- library_name: transformers
4
  base_model:
5
  - deepseek-ai/DeepSeek-R1-Distill-Llama-8B
 
 
 
6
  ---
 
7
  # TokenButler
8
  <!-- markdownlint-disable first-line-h1 -->
9
  <!-- markdownlint-disable html -->
@@ -17,7 +19,7 @@ base_model:
17
  <hr>
18
  <div align="center" style="line-height: 1;">
19
  <!-- Paper Badge -->
20
- <a href="https://github.com/abdelfattah-lab/TokenButler/blob/main/TokenButler_Draft.pdf" target="_blank" style="margin: 2px;">
21
  <img alt="Paper"
22
  src="https://img.shields.io/badge/Paper-View-orange?logo=readthedocs&logoColor=white"
23
  style="display: inline-block; vertical-align: middle;"/>
@@ -28,6 +30,12 @@ base_model:
28
  src="https://img.shields.io/badge/GitHub-Repo-black?logo=github&logoColor=white"
29
  style="display: inline-block; vertical-align: middle;"/>
30
  </a>
 
 
 
 
 
 
31
  </div>
32
 
33
  <br>
 
1
  ---
 
 
2
  base_model:
3
  - deepseek-ai/DeepSeek-R1-Distill-Llama-8B
4
+ library_name: transformers
5
+ license: mit
6
+ pipeline_tag: text-generation
7
  ---
8
+
9
  # TokenButler
10
  <!-- markdownlint-disable first-line-h1 -->
11
  <!-- markdownlint-disable html -->
 
19
  <hr>
20
  <div align="center" style="line-height: 1;">
21
  <!-- Paper Badge -->
22
+ <a href="https://arxiv.org/abs/2503.07518" target="_blank" style="margin: 2px;">
23
  <img alt="Paper"
24
  src="https://img.shields.io/badge/Paper-View-orange?logo=readthedocs&logoColor=white"
25
  style="display: inline-block; vertical-align: middle;"/>
 
30
  src="https://img.shields.io/badge/GitHub-Repo-black?logo=github&logoColor=white"
31
  style="display: inline-block; vertical-align: middle;"/>
32
  </a>
33
+ <!-- Project Page Badge -->
34
+ <a href="https://abdelfattah-lab.github.io/TokenButler/" target="_blank" style="margin: 2px;">
35
+ <img alt="Project Page"
36
+ src="https://img.shields.io/badge/Project%20Page-🌐-lightgrey"
37
+ style="display: inline-block; vertical-align: middle;"/>
38
+ </a>
39
  </div>
40
 
41
  <br>
model-00001-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4a06424d231b4b858b6d735de2be355a9d81b5558c21925bd8eeabad7f62140
3
+ size 4816363256
model-00002-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fb6c677968f55ccf762344219ea002c980b4eee36bc8004bc9908a90e855759
3
+ size 4999813072
model-00003-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:707dd72cdaee4b1099afa9416e41437371a901e98a6e4435a30bb65781773fa0
3
+ size 4999813104
model-00004-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6feb8a06d88592e03352c2cf5d8a6132038ccce44939d88437d39d5a9621dd70
3
+ size 4832007496
model-00005-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09c3f1f6241553ab4304aebe3c832ef53e6a877e7e29f34eb23151f38530900a
3
+ size 4999813120
model-00006-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f42f22d1e85d2d9ef439f10fd24ce74ca05e260a84aaea3980678f2b64769f4
3
+ size 4999813128
model-00007-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eebb1a694f4b88729a06f520d99b489a6778a58d831b7b2dadf51f37c4ae5f80
3
+ size 2806039320
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
modeling_llama_butler.py CHANGED
@@ -918,7 +918,7 @@ class LlamaAttentionExperimental(nn.Module):
918
  self.num_key_value_groups = self.num_heads // self.num_key_value_heads
919
  self.max_position_embeddings = config.max_position_embeddings
920
  self.rope_theta = config.rope_theta
921
- self.inference_mode = False
922
  self.producer = producer
923
  self.layer_idx = layer_idx
924
  self.token_sparse_method = None
@@ -1217,7 +1217,7 @@ class LlamaAttentionExperimental(nn.Module):
1217
  num_active = (~attention_mask.bool()).sum(dim=-1).expand_as(num_deact) # Number of tokens active at this position if zero-sparsity
1218
  effective_sparsity = 100 * (additional_deact.float() / num_active.float()).mean().item()
1219
  self.effective_sparsity = effective_sparsity
1220
- print("Effective Sparsity:", effective_sparsity, "%\t Sequence Length:", q_len)
1221
  if self.layer_idx == 0:
1222
  if self.effective_sparsity is None:
1223
  self.effective_sparsity = 0.0
 
918
  self.num_key_value_groups = self.num_heads // self.num_key_value_heads
919
  self.max_position_embeddings = config.max_position_embeddings
920
  self.rope_theta = config.rope_theta
921
+ self.inference_mode = True
922
  self.producer = producer
923
  self.layer_idx = layer_idx
924
  self.token_sparse_method = None
 
1217
  num_active = (~attention_mask.bool()).sum(dim=-1).expand_as(num_deact) # Number of tokens active at this position if zero-sparsity
1218
  effective_sparsity = 100 * (additional_deact.float() / num_active.float()).mean().item()
1219
  self.effective_sparsity = effective_sparsity
1220
+ # print("Effective Sparsity:", effective_sparsity, "%\t Sequence Length:", q_len)
1221
  if self.layer_idx == 0:
1222
  if self.effective_sparsity is None:
1223
  self.effective_sparsity = 0.0