xiaoyewuz-Ruster commited on
Commit
e6291f2
·
verified ·
1 Parent(s): 7b09cba

Upload TextGenerationPipeline

Browse files
Files changed (3) hide show
  1. config.json +4 -0
  2. tokenizer_config.json +6 -0
  3. zzjrabbit.py +121 -0
config.json CHANGED
@@ -2,6 +2,10 @@
2
  "architectures": [
3
  "ZZJRabbitModelForCausalLM"
4
  ],
 
 
 
 
5
  "hidden_size": 256,
6
  "model_type": "zzjrabbit",
7
  "num_layers": 6,
 
2
  "architectures": [
3
  "ZZJRabbitModelForCausalLM"
4
  ],
5
+ "auto_map": {
6
+ "AutoConfig": "zzjrabbit.ZZJRabbitConfig",
7
+ "AutoModelForCausalLM": "zzjrabbit.ZZJRabbitModelForCausalLM"
8
+ },
9
  "hidden_size": 256,
10
  "model_type": "zzjrabbit",
11
  "num_layers": 6,
tokenizer_config.json CHANGED
@@ -9,6 +9,12 @@
9
  "special": true
10
  }
11
  },
 
 
 
 
 
 
12
  "clean_up_tokenization_spaces": false,
13
  "extra_special_tokens": {},
14
  "mask_token": "0",
 
9
  "special": true
10
  }
11
  },
12
+ "auto_map": {
13
+ "AutoTokenizer": [
14
+ "zzjrabbit.ZZJRabbitTokenizer",
15
+ null
16
+ ]
17
+ },
18
  "clean_up_tokenization_spaces": false,
19
  "extra_special_tokens": {},
20
  "mask_token": "0",
zzjrabbit.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, Union
2
+ from transformers import PreTrainedTokenizer, PreTrainedModel, PretrainedConfig, GenerationMixin
3
+ from transformers.modeling_outputs import BaseModelOutput, CausalLMOutput
4
+ from tokenizers.models import BPE
5
+ from tokenizers import Tokenizer
6
+ import torch.nn as nn
7
+ import torch
8
+ import os.path
9
+
10
+ class ZZJRabbitConfig(PretrainedConfig):
11
+ model_type = "zzjrabbit"
12
+
13
+ def __init__(self, num_layers: int = 6, vocab_size: int = 10000, hidden_size: int = 256, **kwargs):
14
+ self.num_layers = num_layers
15
+ self.vocab_size = vocab_size
16
+ self.hidden_size = hidden_size
17
+ super().__init__(**kwargs)
18
+
19
+ class ZZJRabbitLayer(nn.Module):
20
+ def __init__(self, config: ZZJRabbitConfig):
21
+ super().__init__()
22
+ self.attn = nn.MultiheadAttention(config.hidden_size, 8, 0.1, batch_first=True)
23
+ self.l1 = nn.Linear(config.hidden_size, config.hidden_size)
24
+ self.l2 = nn.Linear(config.hidden_size, config.hidden_size)
25
+ self.activate = nn.ReLU()
26
+ self.norm = nn.RMSNorm(config.hidden_size)
27
+
28
+ def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
29
+ key_padding_mask = None
30
+ attn_mask = None
31
+ if self.training:
32
+ attn_mask = torch.gt(torch.triu(torch.ones(q.size(-2), q.size(-2), device=q.device), 1), 0)
33
+ if attention_mask is not None:
34
+ key_padding_mask = torch.lt(attention_mask, 1)
35
+ attn = self.attn(
36
+ q, k, v,
37
+ key_padding_mask=key_padding_mask,
38
+ attn_mask=attn_mask,
39
+ is_causal=True
40
+ )[0]
41
+ q = self.norm(q + attn)
42
+ o = self.l1(q)
43
+ o = self.activate(o)
44
+ o = self.l2(o)
45
+ return self.norm(q + o)
46
+
47
+ class ZZJRabbitModel(PreTrainedModel):
48
+ config_class = ZZJRabbitConfig
49
+
50
+ def __init__(self, config: ZZJRabbitConfig, **kwargs):
51
+ super().__init__(config, **kwargs)
52
+ self.config = config
53
+ self.emb = nn.Embedding(config.vocab_size, config.hidden_size)
54
+ self.layers = nn.ModuleList([ZZJRabbitLayer(config) for _ in range(config.num_layers)])
55
+
56
+ def forward(self, input_ids: torch.Tensor, return_dict: Optional[bool] = None, attention_mask: Optional[torch.Tensor] = None, **kwargs):
57
+ emb = self.emb(input_ids)
58
+ res = emb
59
+ for l in self.layers:
60
+ res = l(res, res, res, attention_mask)
61
+ if not return_dict:
62
+ return (res,)
63
+ else:
64
+ return BaseModelOutput(res)
65
+
66
+
67
+ class ZZJRabbitModelForCausalLM(PreTrainedModel, GenerationMixin):
68
+ config_class = ZZJRabbitConfig
69
+ def __init__(self, config, **kwargs):
70
+ super().__init__(config, **kwargs)
71
+ self.model = ZZJRabbitModel(config, **kwargs)
72
+ self.l = nn.Linear(config.hidden_size, config.vocab_size)
73
+
74
+ def forward(self, input_ids: torch.Tensor, return_dict: Optional[bool] = None, labels: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, logits_to_keep: Union[int, torch.Tensor] = 0, **kwargs):
75
+ # print(input_ids, return_dict, labels, attention_mask, logits_to_keep, kwargs)
76
+ hidden = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
77
+ logits = self.l(hidden[:, slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep, :])
78
+ if labels is not None:
79
+ loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.vocab_size, **kwargs)
80
+ print(loss)
81
+ if not return_dict:
82
+ return (loss, logits) if labels is not None else (logits,)
83
+ else:
84
+ return CausalLMOutput(logits=logits, loss=loss) if labels is not None else CausalLMOutput(logits=logits)
85
+
86
+ @classmethod
87
+ def can_generate(cls):
88
+ return True
89
+
90
+ def prepare_inputs_for_generation(self, input_ids, **kwargs):
91
+ return {"input_ids": input_ids}
92
+
93
+ class ZZJRabbitTokenizer(PreTrainedTokenizer):
94
+ vocab_files_names = {"tokenizers_file": "tokenizer.json"}
95
+
96
+ def __init__(self, tokenizers_file, **kwargs):
97
+ self.internal = Tokenizer.from_file(tokenizers_file)
98
+ super().__init__(**kwargs)
99
+
100
+ def get_vocab(self):
101
+ return {self.internal.id_to_token(i): i for i in range(self.vocab_size)}
102
+
103
+ def tokenize(self, text, **kwargs):
104
+ return self.internal.encode(text).tokens
105
+
106
+ def convert_tokens_to_ids(self, tokens):
107
+ return self.internal.token_to_id(tokens) if isinstance(tokens, str) else [self.internal.token_to_id(t) for t in tokens]
108
+
109
+ def decode(self, tokens, skip_special_tokens=True, **kwargs):
110
+ if isinstance(tokens, torch.Tensor):
111
+ tokens = tokens.tolist()
112
+ return self.internal.decode(tokens, skip_special_tokens=skip_special_tokens)
113
+
114
+ @property
115
+ def vocab_size(self):
116
+ return self.internal.get_vocab_size()
117
+
118
+ def save_vocabulary(self, path, *args, **kwargs) -> tuple[str]:
119
+ p = os.path.join(path, "tokenizer.json")
120
+ self.internal.save(p)
121
+ return (p,)