wzd2721802 commited on
Commit
4101bf2
·
verified ·
1 Parent(s): e6dd793

Upload 5 files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/root/work/filestorage/LLM/LLaMA-Factory/model/JIUTIAN/LLM_8B/Jiutian-Coder-8B-Instruct-HF",
3
+ "architectures": [
4
+ "JiutianForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_jiutian.JiutianConfig",
9
+ "AutoModel": "modeling_jiutian.JiutianForCausalLM",
10
+ "AutoModelForCausalLM": "modeling_jiutian.JiutianForCausalLM"
11
+ },
12
+ "eos_token_id": 151645,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 13312,
17
+ "max_position_embeddings": 8192,
18
+ "model_type": "jiutian",
19
+ "num_attention_heads": 32,
20
+ "num_hidden_layers": 32,
21
+ "num_key_value_heads": 8,
22
+ "pad_token_id": 151645,
23
+ "pretraining_tp": 1,
24
+ "qkv_bias": true,
25
+ "rms_norm_eps": 1e-05,
26
+ "rope_scaling": null,
27
+ "rope_theta": 10000.0,
28
+ "tie_word_embeddings": false,
29
+ "torch_dtype": "bfloat16",
30
+ "transformers_version": "4.43.4",
31
+ "use_cache": false,
32
+ "vocab_size": 152064
33
+ }
configuration_jiutian.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers.configuration_utils import PretrainedConfig
2
+ from transformers.utils import logging
3
+ logger = logging.get_logger(__name__)
4
+
5
+ CM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
6
+
7
+
8
+ class JiutianConfig(PretrainedConfig):
9
+ model_type = "jiutian"
10
+ keys_to_ignore_at_inference = ["past_key_values"]
11
+
12
+ def __init__(
13
+ self,
14
+ vocab_size=152064,
15
+ hidden_size=8192,
16
+ intermediate_size=13312,
17
+ num_hidden_layers=32,
18
+ num_attention_heads=32,
19
+ num_key_value_heads=8,
20
+ hidden_act="silu",
21
+ max_position_embeddings=8192,
22
+ initializer_range=0.02,
23
+ rms_norm_eps=1e-6,
24
+ use_cache=True,
25
+ pad_token_id=151645,
26
+ bos_token_id=None,
27
+ eos_token_id=151645,
28
+ pretraining_tp=1,
29
+ tie_word_embeddings=False,
30
+ rope_theta=500000,
31
+ rope_scaling=None,
32
+ qkv_bias=True,
33
+ attention_dropout=0.0,
34
+ **kwargs,
35
+ ):
36
+ self.vocab_size = vocab_size
37
+ self.max_position_embeddings = max_position_embeddings
38
+ self.hidden_size = hidden_size
39
+ self.intermediate_size = intermediate_size
40
+ self.num_hidden_layers = num_hidden_layers
41
+ self.num_attention_heads = num_attention_heads
42
+ self.hidden_act = hidden_act
43
+ self.initializer_range = initializer_range
44
+ self.rms_norm_eps = rms_norm_eps
45
+ self.pretraining_tp = pretraining_tp
46
+ self.use_cache = use_cache
47
+ self.rope_theta = rope_theta
48
+ self.rope_scaling = None
49
+ self.qkv_bias = qkv_bias
50
+ self.attention_dropout = attention_dropout
51
+ if num_key_value_heads is None:
52
+ num_key_value_heads = num_attention_heads
53
+ self.num_key_value_heads = num_key_value_heads
54
+
55
+ super().__init__(
56
+ pad_token_id=pad_token_id,
57
+ bos_token_id=bos_token_id,
58
+ eos_token_id=eos_token_id,
59
+ tie_word_embeddings=tie_word_embeddings,
60
+ **kwargs,
61
+ )
62
+
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": 151645,
4
+ "pad_token_id": 151645,
5
+ "transformers_version": "4.43.4"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff