guanwenyu1995 commited on
Commit
3f9e06f
·
verified ·
1 Parent(s): 10ea506

Upload configuration_llama.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_llama.py +5 -24
configuration_llama.py CHANGED
@@ -28,7 +28,6 @@ class LlamaConfig(PretrainedConfig):
28
  This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
29
  model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
30
  defaults will yield a similar configuration to that of the LLaMA-7B.
31
- e.g. [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf)
32
 
33
  Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
34
  documentation from [`PretrainedConfig`] for more information.
@@ -51,8 +50,8 @@ class LlamaConfig(PretrainedConfig):
51
  `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
52
  `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
53
  converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
54
- by meanpooling all the original heads within that group. For more details, check out [this
55
- paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to
56
  `num_attention_heads`.
57
  hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
58
  The non-linear activation function (function or string) in the decoder.
@@ -106,11 +105,11 @@ class LlamaConfig(PretrainedConfig):
106
  `beta_slow` (`float`, *optional*):
107
  Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear
108
  ramp function. If unspecified, it defaults to 1.
109
- `short_factor` (`list[float]`, *optional*):
110
  Only used with 'longrope'. The scaling factor to be applied to short contexts (<
111
  `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
112
  size divided by the number of attention heads divided by 2
113
- `long_factor` (`list[float]`, *optional*):
114
  Only used with 'longrope'. The scaling factor to be applied to long contexts (<
115
  `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
116
  size divided by the number of attention heads divided by 2
@@ -125,7 +124,7 @@ class LlamaConfig(PretrainedConfig):
125
  mlp_bias (`bool`, *optional*, defaults to `False`):
126
  Whether to use a bias in up_proj, down_proj and gate_proj layers in the MLP layers.
127
  head_dim (`int`, *optional*):
128
- The attention head dimension. If None, it will default to hidden_size // num_attention_heads
129
 
130
  ```python
131
  >>> from transformers import LlamaModel, LlamaConfig
@@ -142,21 +141,6 @@ class LlamaConfig(PretrainedConfig):
142
 
143
  model_type = "llama"
144
  keys_to_ignore_at_inference = ["past_key_values"]
145
- # Default tensor parallel plan for base model `LlamaModel`
146
- base_model_tp_plan = {
147
- "layers.*.self_attn.q_proj": "colwise",
148
- "layers.*.self_attn.k_proj": "colwise",
149
- "layers.*.self_attn.v_proj": "colwise",
150
- "layers.*.self_attn.o_proj": "rowwise",
151
- "layers.*.mlp.gate_proj": "colwise",
152
- "layers.*.mlp.up_proj": "colwise",
153
- "layers.*.mlp.down_proj": "rowwise",
154
- }
155
- base_model_pp_plan = {
156
- "embed_tokens": (["input_ids"], ["inputs_embeds"]),
157
- "layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
158
- "norm": (["hidden_states"], ["hidden_states"]),
159
- }
160
 
161
  def __init__(
162
  self,
@@ -220,6 +204,3 @@ class LlamaConfig(PretrainedConfig):
220
  tie_word_embeddings=tie_word_embeddings,
221
  **kwargs,
222
  )
223
-
224
-
225
- __all__ = ["LlamaConfig"]
 
28
  This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
29
  model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
30
  defaults will yield a similar configuration to that of the LLaMA-7B.
 
31
 
32
  Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
33
  documentation from [`PretrainedConfig`] for more information.
 
50
  `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
51
  `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
52
  converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
53
+ by meanpooling all the original heads within that group. For more details checkout [this
54
+ paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
55
  `num_attention_heads`.
56
  hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
57
  The non-linear activation function (function or string) in the decoder.
 
105
  `beta_slow` (`float`, *optional*):
106
  Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear
107
  ramp function. If unspecified, it defaults to 1.
108
+ `short_factor` (`List[float]`, *optional*):
109
  Only used with 'longrope'. The scaling factor to be applied to short contexts (<
110
  `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
111
  size divided by the number of attention heads divided by 2
112
+ `long_factor` (`List[float]`, *optional*):
113
  Only used with 'longrope'. The scaling factor to be applied to long contexts (<
114
  `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
115
  size divided by the number of attention heads divided by 2
 
124
  mlp_bias (`bool`, *optional*, defaults to `False`):
125
  Whether to use a bias in up_proj, down_proj and gate_proj layers in the MLP layers.
126
  head_dim (`int`, *optional*):
127
+ The attention head dimension. If None, it will default to hidden_size // num_heads
128
 
129
  ```python
130
  >>> from transformers import LlamaModel, LlamaConfig
 
141
 
142
  model_type = "llama"
143
  keys_to_ignore_at_inference = ["past_key_values"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  def __init__(
146
  self,
 
204
  tie_word_embeddings=tie_word_embeddings,
205
  **kwargs,
206
  )