Lanni-ni commited on
Commit
1ffacc6
·
verified ·
1 Parent(s): cc86025

Upload AlibiForCausalLM

Browse files
config.json CHANGED
@@ -1,11 +1,12 @@
1
  {
 
2
  "architectures": [
3
  "AlibiForCausalLM"
4
  ],
5
  "attention_bias": false,
6
  "auto_map": {
7
- "AutoConfig": "configuration_transformer.TransformerConfig",
8
- "AutoModelForCausalLM": "modeling_transformer.TransformerForCausalLM"
9
  },
10
  "bos_token_id": null,
11
  "elementwise_affine": true,
@@ -18,7 +19,7 @@
18
  "initializer_range": 0.02,
19
  "intermediate_size": null,
20
  "max_position_embeddings": null,
21
- "model_type": "transformer-project_fox",
22
  "norm_eps": 1e-06,
23
  "num_heads": 6,
24
  "num_hidden_layers": 4,
@@ -26,7 +27,7 @@
26
  "rope_base": 500000.0,
27
  "tie_word_embeddings": false,
28
  "torch_dtype": "float32",
29
- "transformers_version": "4.44.0",
30
  "use_alibi": true,
31
  "use_cache": true,
32
  "use_rope": false,
 
1
  {
2
+ "_name_or_path": "alibi_4_6_384_",
3
  "architectures": [
4
  "AlibiForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "auto_map": {
8
+ "AutoConfig": "configuration_alibi.AlibiConfig",
9
+ "AutoModelForCausalLM": "modeling_alibi.AlibiForCausalLM"
10
  },
11
  "bos_token_id": null,
12
  "elementwise_affine": true,
 
19
  "initializer_range": 0.02,
20
  "intermediate_size": null,
21
  "max_position_embeddings": null,
22
+ "model_type": "alibi",
23
  "norm_eps": 1e-06,
24
  "num_heads": 6,
25
  "num_hidden_layers": 4,
 
27
  "rope_base": 500000.0,
28
  "tie_word_embeddings": false,
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.46.0.dev0",
31
  "use_alibi": true,
32
  "use_cache": true,
33
  "use_rope": false,
configuration_alibi.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ from typing import Optional
3
+ from transformers.configuration_utils import PretrainedConfig
4
+
5
+ class AlibiConfig(PretrainedConfig):
6
+ model_type = 'alibi'
7
+ keys_to_ignore_at_inference = ['past_key_values']
8
+
9
+ def __init__(
10
+ self,
11
+ vocab_size: int = 32000,
12
+ hidden_size: int = 2048,
13
+ hidden_ratio: Optional[int] = 4,
14
+ intermediate_size: Optional[int] = None,
15
+ num_hidden_layers: int = 24,
16
+ num_heads: int = 32,
17
+ num_kv_heads: int = None,
18
+ hidden_act: str = "swish",
19
+ window_size: Optional[int] = None,
20
+ max_position_embeddings: int = 2048,
21
+ initializer_range: float = 0.02,
22
+ elementwise_affine: Optional[bool] = True,
23
+ norm_eps: float = 1e-6,
24
+ use_cache: bool = True,
25
+ pad_token_id: int = None,
26
+ bos_token_id: int = 1,
27
+ eos_token_id: int = 2,
28
+ tie_word_embeddings: bool = False,
29
+ attention_bias: bool = False,
30
+ fuse_norm: bool = True,
31
+ fuse_cross_entropy: bool = True,
32
+ rope_base: float = 500000.0,
33
+ use_rope: bool = False,
34
+ use_alibi: bool = True,
35
+ **kwargs,
36
+ ):
37
+ self.vocab_size = vocab_size
38
+ self.hidden_size = hidden_size
39
+ self.hidden_ratio = hidden_ratio
40
+ self.intermediate_size = intermediate_size
41
+ self.num_hidden_layers = num_hidden_layers
42
+ self.num_heads = num_heads
43
+ self.num_kv_heads = num_kv_heads
44
+ self.window_size = window_size
45
+ self.max_position_embeddings = max_position_embeddings
46
+ self.hidden_act = hidden_act
47
+ self.initializer_range = initializer_range
48
+ self.elementwise_affine = elementwise_affine
49
+ self.norm_eps = norm_eps
50
+ self.use_cache = use_cache
51
+ self.attention_bias = attention_bias
52
+ self.fuse_cross_entropy = fuse_cross_entropy
53
+ self.fuse_norm = fuse_norm
54
+ self.rope_base = rope_base
55
+ self.use_rope = use_rope
56
+ self.use_alibi = use_alibi
57
+
58
+ super().__init__(
59
+ pad_token_id=pad_token_id,
60
+ bos_token_id=bos_token_id,
61
+ eos_token_id=eos_token_id,
62
+ tie_word_embeddings=tie_word_embeddings,
63
+ **kwargs,
64
+ )
generation_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
  "_from_model_config": true,
3
- "transformers_version": "4.44.0"
4
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "transformers_version": "4.46.0.dev0"
4
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:549a8cb8ab5e54e1036faab7620e1783038719ea894c4ec2d9912b2cd143ed05
3
  size 182780024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a056cc03312bbb395c0c1ed80061e8dabea55d05697ce382a8b113afc6fc8bfc
3
  size 182780024