xieli123 commited on
Commit
f1363d2
·
verified ·
1 Parent(s): caab146

Upload configuration_step1.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_step1.py +61 -0
configuration_step1.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
+ """Configuration for Step1 text-only models."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from transformers.configuration_utils import PretrainedConfig
8
+
9
+
10
+ class Step1Config(PretrainedConfig):
11
+ model_type = "step1"
12
+ architectures = ["Step1ForCausalLM"]
13
+ keys_to_ignore_at_inference = ["past_key_values"]
14
+
15
+ def __init__(
16
+ self,
17
+ *,
18
+ hidden_size: int = 3072,
19
+ intermediate_size: int = 8192,
20
+ num_attention_heads: int = 48,
21
+ num_attention_groups: int = 4,
22
+ num_hidden_layers: int = 32,
23
+ max_seq_len: int = 32768,
24
+ vocab_size: int = 74752,
25
+ rms_norm_eps: float = 1e-5,
26
+ bos_token_id: int = 1,
27
+ eos_token_id: int = 3,
28
+ pad_token_id: int = 0,
29
+ tie_word_embeddings: bool = True,
30
+ initializer_range: float = 0.02,
31
+ **kwargs,
32
+ ) -> None:
33
+ self.hidden_size = hidden_size
34
+ self.intermediate_size = intermediate_size
35
+ self.num_attention_heads = num_attention_heads
36
+ self.num_attention_groups = num_attention_groups
37
+ self.num_hidden_layers = num_hidden_layers
38
+ self.max_seq_len = max_seq_len
39
+ # Align with common config key used by scheduling logic.
40
+ self.max_position_embeddings = kwargs.pop(
41
+ "max_position_embeddings", max_seq_len
42
+ )
43
+ self.vocab_size = vocab_size
44
+ self.rms_norm_eps = rms_norm_eps
45
+ # Some downstream components expect num_key_value_heads; alias to groups
46
+ # so grouped KV attention can be derived even if the checkpoint omits it.
47
+ self.num_key_value_heads = kwargs.pop(
48
+ "num_key_value_heads", num_attention_groups
49
+ )
50
+ super().__init__(
51
+ bos_token_id=bos_token_id,
52
+ eos_token_id=eos_token_id,
53
+ pad_token_id=pad_token_id,
54
+ tie_word_embeddings = tie_word_embeddings,
55
+ initializer_range=initializer_range,
56
+ **kwargs,
57
+ )
58
+
59
+
60
+ __all__ = ["Step1Config"]
61
+