xiaoyewuz-Ruster commited on
Commit
10b5bb1
·
verified ·
1 Parent(s): 7c7f2ba

Upload 5 files

Browse files
config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./MSOT-Final",
3
+ "architectures": [
4
+ "MSOTModelForCausalLM"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "trace_v3.MSOTConfig",
8
+ "AutoModelForCausalLM": "trace_v3.MSOTModelForCausalLM"
9
+ },
10
+ "hidden_size": 768,
11
+ "model_type": "msot",
12
+ "torch_dtype": "float32",
13
+ "transformers_version": "4.46.2",
14
+ "vocab_size": 65536
15
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.46.2"
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "pad_token": {
3
+ "content": "\u0000",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ }
9
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "\u0000",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ }
11
+ },
12
+ "auto_map": {
13
+ "AutoTokenizer": [
14
+ "trace_v3.MSOTTokenizer",
15
+ null
16
+ ]
17
+ },
18
+ "clean_up_tokenization_spaces": false,
19
+ "model_max_length": 1000000000000000019884624838656,
20
+ "pad_token": "\u0000",
21
+ "tokenizer_class": "MSOTTokenizer"
22
+ }
trace_v3.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PreTrainedTokenizer, PreTrainedModel, PretrainedConfig, GenerationMixin
2
+ from transformers.modeling_outputs import BaseModelOutput, CausalLMOutput
3
+ import torch.nn as nn
4
+ import torch
5
+
6
+ class MSOTConfig(PretrainedConfig):
7
+ model_type = "msot"
8
+ def __init__(self, vocab_size=128, hidden_size=16, **kwargs):
9
+ self.vocab_size = vocab_size
10
+ self.hidden_size = hidden_size
11
+ super().__init__(**kwargs)
12
+
13
+ class MSOTModel(PreTrainedModel):
14
+ config_class = MSOTConfig
15
+ def __init__(self, config, **kwargs):
16
+ super().__init__(config, **kwargs)
17
+ self.config = config
18
+ self.emb = nn.Embedding(config.vocab_size, config.hidden_size)
19
+ self.l1 = nn.Linear(config.hidden_size, config.hidden_size)
20
+ self.l2 = nn.Linear(config.hidden_size, config.hidden_size)
21
+ self.l3 = nn.Linear(config.hidden_size, config.hidden_size)
22
+
23
+ def forward(self, input_ids, return_dict = None, **kwargs):
24
+ hidden = self.emb(input_ids)
25
+ a = self.l1(hidden)
26
+ b = self.l2(hidden).transpose(-2, -1)
27
+ c = self.l3(hidden)
28
+ res = a @ b @ c
29
+ # print("input:", input_ids)
30
+ # print("output:", res)
31
+ if not return_dict:
32
+ return (res,)
33
+ else:
34
+ return BaseModelOutput(res)
35
+
36
+ class MSOTModelForCausalLM(PreTrainedModel, GenerationMixin):
37
+ config_class = MSOTConfig
38
+ def __init__(self, config, **kwargs):
39
+ super().__init__(config, **kwargs)
40
+ self.model = MSOTModel(config, **kwargs)
41
+ self.l = nn.Linear(config.hidden_size, config.vocab_size)
42
+
43
+ def forward(self, input_ids, return_dict = None, labels = None, **kwargs):
44
+ hidden = self.model(input_ids)[0]
45
+ res = self.l(hidden)
46
+ if labels is not None:
47
+ loss = nn.functional.cross_entropy(res[:, :-1, :].contiguous().view(-1, self.model.config.vocab_size), labels[:, 1:].contiguous().view(-1))
48
+ print(loss)
49
+ if not return_dict:
50
+ return (loss, res) if labels is not None else (res,)
51
+ else:
52
+ return CausalLMOutput(logits=res, loss=loss) if labels is not None else CausalLMOutput(logits=res)
53
+
54
+ def can_generate(self):
55
+ return True
56
+
57
+ def prepare_inputs_for_generation(self, input_ids, attention_mask = None, **kwargs):
58
+ return {"input_ids": input_ids}
59
+
60
+
61
+
62
+ class MSOTTokenizer(PreTrainedTokenizer):
63
+ def get_vocab(self):
64
+ return {chr(i): i for i in range(65536)}
65
+
66
+ def _tokenize(self, text):
67
+ return [c if ord(c) < 65536 else 0 for c in text]
68
+
69
+ def _convert_token_to_id(self, token):
70
+ return ord(token)
71
+
72
+ def _convert_id_to_token(self, id):
73
+ return chr(id)
74
+
75
+ @property
76
+ def vocab_size(self):
77
+ return 65536
78
+
79
+ def save_vocabulary(self, *args, **kwargs):
80
+ return ()
81
+
82
+ def gen128(model, input):
83
+ tokens = torch.tensor([list(bytes(input,"ascii"))])
84
+ res = list(model.generate(tokens, max_new_tokens=50)[0])
85
+ return bytes(res).decode("utf-8")
86
+
87
+ def gen65536(model, input):
88
+ tokens = torch.tensor([[ord(c) for c in input if ord(c) < 65536]])
89
+ res = list(model.generate(tokens, max_new_tokens=50)[0])
90
+ return "".join([chr(o) for o in res])
91
+
92
+ if __name__ == "__main__":
93
+ MSOTConfig.register_for_auto_class()
94
+ MSOTModel.register_for_auto_class("AutoModel")
95
+ MSOTModelForCausalLM.register_for_auto_class("AutoModelForCausalLM")
96
+ MSOTTokenizer.register_for_auto_class()