Joey Callanan commited on
Commit
e2b7617
·
1 Parent(s): 44c0eb3

adding SCMG

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. SCMG/__pycache__/_version.cpython-310.pyc +0 -0
  2. SCMG/_version.py +2 -0
  3. SCMG/config/__init__.py +0 -0
  4. SCMG/config/__pycache__/__init__.cpython-310.pyc +0 -0
  5. SCMG/config/__pycache__/modelparameters.cpython-310.pyc +0 -0
  6. SCMG/config/__pycache__/varables.cpython-310.pyc +0 -0
  7. SCMG/config/modelparameters.py +21 -0
  8. SCMG/config/varables.py +234 -0
  9. SCMG/models/GPT/__init__.py +0 -0
  10. SCMG/models/GPT/__pycache__/__init__.cpython-310.pyc +0 -0
  11. SCMG/models/GPT/__pycache__/model.cpython-310.pyc +0 -0
  12. SCMG/models/GPT/__pycache__/sampler.cpython-310.pyc +0 -0
  13. SCMG/models/GPT/model.py +197 -0
  14. SCMG/models/GPT/sampler.py +85 -0
  15. SCMG/models/GPT2/__init__.py +0 -0
  16. SCMG/models/GPT2/__pycache__/__init__.cpython-310.pyc +0 -0
  17. SCMG/models/GPT2/__pycache__/model.cpython-310.pyc +0 -0
  18. SCMG/models/GPT2/__pycache__/sampler.cpython-310.pyc +0 -0
  19. SCMG/models/GPT2/model.py +197 -0
  20. SCMG/models/GPT2/sampler.py +85 -0
  21. SCMG/models/LSTM/__init__.py +0 -0
  22. SCMG/models/LSTM/__pycache__/__init__.cpython-310.pyc +0 -0
  23. SCMG/models/LSTM/__pycache__/model.cpython-310.pyc +0 -0
  24. SCMG/models/LSTM/__pycache__/sampler.cpython-310.pyc +0 -0
  25. SCMG/models/LSTM/__pycache__/trainer.cpython-310.pyc +0 -0
  26. SCMG/models/LSTM/model.py +48 -0
  27. SCMG/models/LSTM/sampler.py +20 -0
  28. SCMG/models/LSTM/trainer.py +195 -0
  29. SCMG/models/Reinvent/__init__.py +0 -0
  30. SCMG/models/Reinvent/__pycache__/__init__.cpython-310.pyc +0 -0
  31. SCMG/models/Reinvent/__pycache__/model copy 2.cpython-310.pyc +0 -0
  32. SCMG/models/Reinvent/__pycache__/model copy.cpython-310.pyc +0 -0
  33. SCMG/models/Reinvent/__pycache__/model.cpython-310.pyc +0 -0
  34. SCMG/models/Reinvent/__pycache__/sampler.cpython-310.pyc +0 -0
  35. SCMG/models/Reinvent/model copy 2.py +420 -0
  36. SCMG/models/Reinvent/model copy.py +187 -0
  37. SCMG/models/Reinvent/model.py +278 -0
  38. SCMG/models/Reinvent/sampler.py +85 -0
  39. SCMG/models/Reinvent_Scaffold_Decorator/__init__.py +0 -0
  40. SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/__init__.cpython-310.pyc +0 -0
  41. SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/model copy 2.cpython-310.pyc +0 -0
  42. SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/model copy.cpython-310.pyc +0 -0
  43. SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/sampler.cpython-310.pyc +0 -0
  44. SCMG/models/Reinvent_Scaffold_Decorator/model copy 2.py +420 -0
  45. SCMG/models/Reinvent_Scaffold_Decorator/model copy.py +187 -0
  46. SCMG/models/Reinvent_Scaffold_Decorator/model.py +276 -0
  47. SCMG/models/Reinvent_Scaffold_Decorator/sampler.py +85 -0
  48. SCMG/models/Transformer/__init__.py +1 -0
  49. SCMG/models/Transformer/__pycache__/__init__.cpython-310.pyc +0 -0
  50. SCMG/models/Transformer/__pycache__/model copy 2.cpython-310.pyc +0 -0
SCMG/__pycache__/_version.cpython-310.pyc ADDED
Binary file (280 Bytes). View file
 
SCMG/_version.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ def get_versions():
2
+ version = "0.1.1"
SCMG/config/__init__.py ADDED
File without changes
SCMG/config/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (168 Bytes). View file
 
SCMG/config/__pycache__/modelparameters.cpython-310.pyc ADDED
Binary file (430 Bytes). View file
 
SCMG/config/__pycache__/varables.cpython-310.pyc ADDED
Binary file (6.19 kB). View file
 
SCMG/config/modelparameters.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # class ModelParameters():
2
+ # def __init__(self):
3
+ # self.NUM_LAYERS = "num_layers"
4
+ # self.NUM_HEADS = "num_heads"
5
+ # self.DIM_ATTENTION = "dim_attention"
6
+ # self.DIM_FEEDFORWARD = "dim_feedforward"
7
+ # self.DIM_LSTM = "dim_lstm"
8
+ # self.DIM_EMBEDDING = "dim_embedding"
9
+ # self.DIM_OUTPUT = "dim_output"
10
+ # self.RATE_DROPOUT = "rate_dropout"
11
+ # return
12
+ #
13
+
14
+ NUM_LAYERS = "num_layers"
15
+ NUM_HEADS = "num_heads"
16
+ DIM_ATTENTION = "dim_attention"
17
+ DIM_FEEDFORWARD = "dim_feedforward"
18
+ DIM_LSTM = "dim_lstm"
19
+ DIM_EMBEDDING = "dim_embedding"
20
+ DIM_OUTPUT = "dim_output"
21
+ RATE_DROPOUT = "rate_dropout"
SCMG/config/varables.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from rdkit import Chem
3
+
4
+ DEFAULT = "default"
5
+ AUTO = "auto"
6
+
7
+ # Variables
8
+ COLUMN_SMILES = "SMILES"
9
+ COLUMN_ENCODER = "Encoder"
10
+ COLUMN_DECODER = "Decoder"
11
+ COLUMN_TASK_TYPE = "TaskType"
12
+ COLUMN_ENCODER_SEQUENCE = "EncoderSequence"
13
+ COLUMN_DECODER_SEQUENCE = "DecoderSequence"
14
+ COLUMN_BOS_TOKEN = "TokenBOS"
15
+ COLUMN_CUTS = "Cuts"
16
+ COLUMN_MIN_TOP_P = "MinTopP"
17
+ COLUMN_MIN_TOKEN_PROB = "MinTokenProb"
18
+ COLUMN_TOKEN_EOS_PROB = "TokenEOSProb"
19
+ COLUMN_MOLNAME = "MolName"
20
+ COLUMN_MOLINDEX = "MolIndex"
21
+ COLUMN_MOL_PROB = "MolProb"
22
+ COLUMN_MOL_PROB_TOPP = "MolProb_TopP"
23
+
24
+ # Task
25
+ TOKEN_BEGIN = "<bos>"
26
+ TOKEN_END = "<eos>"
27
+ TOKEN_SEP = "<sep>"
28
+ TOKEN_CODER_SEP = "<delim>"
29
+ # TRAIN = "Train"
30
+ TOKEN_PAD = "<pad>"
31
+ COLUMN_EXCLUDED_MIN = "ExcludedSize"
32
+ COLUMN_SIZE_ToRunForNExt = "ExcludedSize"
33
+ COLUMN_SIZE_EXCLUDED = "ExcludedSize"
34
+
35
+ # char_level_molecule_generation
36
+ COLUMN_task_char_mg = "char_mg"
37
+ TOKEN_TASK_CHAR_MG = "<char_mg>"
38
+
39
+ # char_level_scaffold_constrained_molecule_generation
40
+ COLUMN_task_char_scmg = "char_scmg"
41
+ TOKEN_TASK_SCMG_CHAR_RAND = "<scmg_char_rand>"
42
+ TOKEN_TASK_SCMG_CHAR_CANO = "<scmg_char_cano>"
43
+ TOKEN_TASK_DG_CHAR_RAND = "<dg_char_rand>"
44
+ TOKEN_TASK_DG_CHAR_CANO = "<dg_char_cano>"
45
+ LIST_HEAVY_ATOMS = ['c', 'C', 'O', 'N', 'n', 'F', '[C@H]', 'Cl', '[C@@H]', 'S', '[nH]', 's', 'o', 'Br', '[C@]', '[C@@]', 'P', 'B', '[N+]', '[P@@]', '[P@]', '[S@@]', '[N@+]', '[S@]', '[N@@+]', '[N-]', 'p']
46
+ COLUMN_EXCLUDE_REASON = "Excluded"
47
+ COLUMN_STATE = "State"
48
+ # chemical_property_prediction
49
+ COLUMN_task_chem_pd = "chem_pd"
50
+ TOKEN_TASK_CHEM_PD = "<chem_pd>"
51
+
52
+ # molecule_identification
53
+ COLUMN_task_mol_id = "mol_id"
54
+ TOKEN_TASK_MOL_ID = "<mol_id>"
55
+
56
+
57
+
58
+ FILEPATH_MODEL = "filepath_model"
59
+ FILEPATH_INPUT = "filepath_input"
60
+ DIRPATH_OUTPUT = "dirpath_output"
61
+ RANDOM_AUGUMENT = "random_augument"
62
+ TOP_P = "top_p"
63
+ TOP_K = "top_k"
64
+ MIN_MOL_PROB = "minimum_mol_prob"
65
+ MIN_TOKEN_PROB = "minimum_token_prob"
66
+ MAX_HEAVY_ATOMS = "maximum_heavy_atoms"
67
+ TEMPERATURE = "temperature"
68
+
69
+ # Data
70
+ VOCAB = "vocab"
71
+ SIZE_VOCAB = "size_vocab"
72
+ FILENAME_VOCAB = "vocab.pt"
73
+ FILENAME_VOCABSTATE = "vocabstate.pt"
74
+ FILENAME_DATA_RAW = "data.csv"
75
+
76
+ TRAIN = "train"
77
+ TEST = "test"
78
+ FILENAME_TRAIN_RAW = "train.pt"
79
+ FILENAME_TRAIN_EPOCH = lambda x: "train_"+str(x)+".pt"
80
+
81
+ FILENAME_TEST = "test.pt"
82
+ FILENAME_TEST_RAW = "test.pt"
83
+ FILENAME_TEST_EPOCH = lambda x: "test_"+str(x)+".pt"
84
+ FILEPATH_VOCAB = "filepath_vocab"
85
+ #
86
+ # try:
87
+ # config.screen_width = os.get_terminal_size()[0]
88
+ # except:
89
+ # config.screen_width = 141
90
+ MAX_SEQUENCE_LENGTH = "max_sequence_length"
91
+ COLUMN_INCHIKEY = "InchiKey"
92
+ # Train
93
+ MODEL_NAME = "model_name"
94
+ MODEL_TYPE = "model_type"
95
+ MODEL = "model"
96
+ TASKS = "tasks"
97
+ DIRPATH_CHECKPOINT = "dirpath_checkpoint"
98
+ DIRPATH_DATA = "dirpath_data"
99
+ SIZE_BATCH = "size_batch"
100
+ SIZE_BLOCK = "size_block"
101
+ RATE_LEARNING = "rate_learning"
102
+ DEVICE = "device"
103
+ EPOCH = "epoch"
104
+ EPOCHS = "epochs"
105
+ NUM_WORKERS = "num_workers"
106
+ DIRPATH_COMPLETED = "dirpath_completed"
107
+ DIRPATH_EXCLUDED = "dirpath_excluded"
108
+ DIRPATH_SBATCH = "dirpath_sbatch"
109
+
110
+ # Stats
111
+ TRAIN_LOSS = "train_loss"
112
+ TEST_LOSS = "test_loss"
113
+ TIME_ELAPSED = "time_elapsed"
114
+ RATE_LEARNING = "rate_learning"
115
+ TOKENS = "tokens"
116
+
117
+ # Model
118
+ FILENAME_MODEL_INIT = "model_init.pt"
119
+ FILENAME_MODEL_LATEST = "model.pt"
120
+ FILENAME_MODEL_TRAINED = lambda x: "model_"+str(x)+".pt"
121
+
122
+ FILENAME_MODELSTATE_INIT = "modelstate_init.pt"
123
+ FILENAME_MODELSTATE_LATEST = "modelstate.pt"
124
+ FILENAME_MODELSTATE_TRAINED = lambda x: "modelstate_"+str(x)+".pt"
125
+
126
+ FILENAME_SCHEDULER_INIT = "scheduler_init.pt"
127
+ FILENAME_SCHEDULER_LATEST = "scheduler.pt"
128
+ FILENAME_SCHEDULER_TRAINED = lambda x: "scheduler_"+str(x)+".pt"
129
+
130
+ FILENAME_OPTIMIZER_INIT = "optimizer_init.pt"
131
+ FILENAME_OPTIMIZER_LATEST = "optimizer.pt"
132
+ FILENAME_OPTIMIZER_TRAINED = lambda x: "optimizer_"+str(x)+".pt"
133
+
134
+ # FILENAME_TRAINLOG_INIT = "train_init.pt"
135
+ FILENAME_TRAINSTATS_LATEST = "trainstats_latest.csv"
136
+ FILENAME_TRAINSTATS_TRAINED = lambda x: "trainstats_"+str(x)+".csv"
137
+
138
+ FILENAME_TRAINLOG = "train"
139
+ FORMAT_TIMESTAMP_FILEHANDLER = "%Y%m%d%H%M%S_%f.log"
140
+ FORMAT_TIMESTAMP = "%Y/%m/%d %H:%M:%S %f"
141
+
142
+ FORMAT_LOG = ""
143
+ DRY_RUN = "dry_run"
144
+ LOG_LEVEL = "log_level"
145
+ TOKENIZER = "tokenizer"
146
+ RUN_ONE_EPOCH = "run_one_epoch"
147
+ # # Column names
148
+ # IS_NOVEL = "IS_NOVAL"
149
+ # NOVALTY = "Novalty"
150
+ # # VALIDITY = "Validity"
151
+ # IS_VALID = "IS_VALID"
152
+ # IS_NOVAL = "IS_NOVAL"
153
+ # DIR_SAVE = "dir_save"
154
+ # MODEL_LATEST = "model.pt"
155
+ # LOG_TRAIN_LATEST = "train_log.csv"
156
+ # OPTIMIZER_LATEST = "optimizer.pt"
157
+ # SCHEDULER_LATEST = "scheduler.pt"
158
+ # TRAIN_LOSS = "train_loss"
159
+ # TEST_LOSS = "test_loss"
160
+ # TIME_ELAPSED = "time_elapsed"
161
+ # # LR = "lr"
162
+ # TOKENS = "tokens"
163
+
164
+ LOGP = "logP"
165
+ WEIGHT = "weight"
166
+ QED = "QED"
167
+ VALIDITY = "SMILES_VALID"
168
+ FILENAME_TRAIN_DIST = "train_dist.pt"
169
+ FILENAME_TEST_DIST = "test_dist.pt"
170
+ MODEL_PRETRAIN = "model_pretrained.pt"
171
+
172
+ PYFILE_SAMPLER = "sampler.py"
173
+ PYFILE_TRAINER = "trainer.py"
174
+ PYFILE_DATALOADER = "dataloader.py"
175
+ # PYFILE_SAMPLER = "sampler.py"
176
+
177
+
178
+
179
+
180
+ # Model parameters
181
+ NUM_LAYERS = "num_layers"
182
+ NUM_ENCODER_LAYERS = "num_encoder_layers"
183
+ NUM_DECODER_LAYERS = "num_decoder_layers"
184
+ NUM_HEADS = "num_heads"
185
+ DIM_ATTENTION = "dim_attention"
186
+ DIM_FEEDFORWARD = "dim_feedforward"
187
+ DIM_LSTM = "dim_lstm"
188
+ DIM_EMBEDDING = "dim_embedding"
189
+ DIM_OUTPUT = "dim_output"
190
+ RATE_DROPOUT = "rate_dropout"
191
+
192
+
193
+
194
+
195
+ #Scheduler
196
+ SIZE_STEP = "size_step"
197
+ GAMMA = "gamma"
198
+
199
+
200
+
201
+
202
+
203
+
204
+
205
+
206
+ # From Reinvent-Scaffold-Decorator
207
+ ATTACHMENT_POINT_TOKEN = "*"
208
+ ATTACHMENT_POINT_NUM_REGEXP = r"\[{}:(\d+)\]".format(re.escape(ATTACHMENT_POINT_TOKEN))
209
+ ATTACHMENT_POINT_REGEXP = r"(?:{0}|\[{0}[^\]]*\])".format(re.escape(ATTACHMENT_POINT_TOKEN))
210
+ ATTACHMENT_POINT_NO_BRACKETS_REGEXP = r"(?<!\[){}".format(re.escape(ATTACHMENT_POINT_TOKEN))
211
+
212
+ ATTACHMENT_SEPARATOR_TOKEN = "|"
213
+
214
+ SLICE_SMARTS = {
215
+ "hr": [
216
+ "[*]!@-[*]"
217
+ ],
218
+ "recap": [
219
+ "[C;$(C=O)]!@-N", # amides and urea
220
+ "[C;$(C=O)]!@-O", # esters
221
+ "C!@-[N;!$(NC=O)]", # amines
222
+ "C!@-[O;!$(NC=O)]", # ether
223
+ "[CX3]!@=[CX3]", # olefin
224
+ "[N+X4]!@-C", # quaternary nitrogen
225
+ "n!@-C", # aromatic N - aliphatic C
226
+ "[$([NR][CR]=O)]!@-C", # lactam nitrogen - aliphatic carbon
227
+ "c!@-c", # aromatic C - aromatic C
228
+ "N!@-[$(S(=O)=O)]" # sulphonamides
229
+ ]
230
+ }
231
+ SLICE_SMARTS = {name: [Chem.MolFromSmarts(sma) for sma in smarts] for name, smarts in SLICE_SMARTS.items()}
232
+
233
+
234
+
SCMG/models/GPT/__init__.py ADDED
File without changes
SCMG/models/GPT/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (172 Bytes). View file
 
SCMG/models/GPT/__pycache__/model.cpython-310.pyc ADDED
Binary file (7.55 kB). View file
 
SCMG/models/GPT/__pycache__/sampler.cpython-310.pyc ADDED
Binary file (3.16 kB). View file
 
SCMG/models/GPT/model.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import logging
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ from torch.nn import functional as F
7
+
8
+ # logger = logging.getLogger(__name__)
9
+ from SCMG.config import varables
10
+ from torch.autograd import Variable
11
+
12
+ class PositionalEncoder(nn.Module):
13
+ def __init__(self, config):
14
+ super(PositionalEncoder, self).__init__()
15
+ self.Dropout = nn.Dropout(p=config[varables.RATE_DROPOUT])
16
+ max_len = config[varables.SIZE_BLOCK]
17
+ pe = torch.zeros(max_len, config[varables.DIM_ATTENTION])
18
+ position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
19
+ div_term = torch.exp(torch.arange(0, config[varables.DIM_ATTENTION], 2).float() * (-math.log(10000.0) / config[varables.DIM_ATTENTION]))
20
+ pe[:, 0::2] = torch.sin(position * div_term)
21
+ pe[:, 1::2] = torch.cos(position * div_term)
22
+ pe = pe.unsqueeze(0)
23
+ self.register_buffer('pe', pe)
24
+ def forward(self, T):
25
+ x = self.Dropout(self.pe[:,:T, :])
26
+ return x
27
+
28
+
29
+ class Attention(nn.Module):
30
+ def __init__(self, config):
31
+ super().__init__()
32
+ assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
33
+ self.Key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
34
+ self.Query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
35
+ self.Value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
36
+ self.Dropout_Attention = nn.Dropout(config[varables.RATE_DROPOUT])
37
+ self.Dropout_Residue = nn.Dropout(config[varables.RATE_DROPOUT])
38
+ self.Projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
39
+ self.NumberOfHeads = config[varables.NUM_HEADS]
40
+ self.DimHead = config[varables.DIM_ATTENTION] // self.NumberOfHeads
41
+ self.DimAttention = config[varables.DIM_ATTENTION]
42
+
43
+ def forward(self, X_1,X_2, mask=None):
44
+ if X_2 is None:
45
+ X_2 = X_1
46
+ BatchSize, T_Encoder, _ = X_1.size()
47
+ BatchSize, T_Decoder, _ = X_2.size()
48
+ K = self.Key( X_1).view(BatchSize, T_Encoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
49
+ Q = self.Query(X_2).view(BatchSize, T_Decoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
50
+ V = self.Value(X_1).view(BatchSize, T_Encoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
51
+ # k,q,v dimension: (BatchSize, SequenceSize, NumberOfHeads, HeadDimension) 3,4,5,16
52
+ ScoreAttention = (Q @ K.transpose(-2, -1)) / math.sqrt(self.DimHead)
53
+ ScoreAttention = ScoreAttention.masked_fill(mask==0, -1e9)
54
+ ScoreAttention = F.softmax(ScoreAttention, dim=-1)
55
+ ScoreAttention = self.Dropout_Attention(ScoreAttention)
56
+ # k.transpose(-2,-1): 3,4,16,5
57
+ # (q@(k.transpose(-2,-1))): 3,4,5,5
58
+ Z = ScoreAttention @ V
59
+ # y dimension: 3,4,5,16
60
+ Z = Z.transpose(1, 2).contiguous().view(BatchSize, T_Decoder, self.DimAttention)
61
+ # y dimension: 3,5,64
62
+ Z = self.Dropout_Residue(self.Projection(Z))
63
+ return Z
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
73
+
74
+ class FeedForward(nn.Module):
75
+ def __init__(self, config):
76
+ super().__init__()
77
+ if config[varables.DIM_FEEDFORWARD] == 0:
78
+ Dim_FeedForward = config[varables.DIM_ATTENTION] *4
79
+ else:
80
+ Dim_FeedForward = config[varables.DIM_FEEDFORWARD]
81
+ self.Linear1 = nn.Linear(config[varables.DIM_EMBEDDING], Dim_FeedForward)
82
+ self.GELU = nn.GELU()
83
+ self.Linear2 = nn.Linear(Dim_FeedForward, config[varables.DIM_EMBEDDING])
84
+ self.Dropout = nn.Dropout(config[varables.RATE_DROPOUT])
85
+
86
+ def forward(self,x):
87
+ x = self.Linear1(x)
88
+ x = self.GELU (x)
89
+ x = self.Dropout(x)
90
+ x = self.Linear2(x)
91
+ return x
92
+
93
+ class DecoderBlock(nn.Module):
94
+ def __init__(self, config):
95
+ super().__init__()
96
+ self.LayerNorm1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
97
+ self.LayerNorm2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
98
+ self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
99
+ self.Dropout2 = nn.Dropout(config[varables.RATE_DROPOUT])
100
+ self.AttentionMasked = Attention( config)
101
+ self.AttentionCross = Attention( config)
102
+ self.FeedForward = FeedForward(config)
103
+
104
+ def forward(self, X_Decoder,Mask_Decoder):
105
+ X_Decoder = self.Dropout1(X_Decoder + self.AttentionMasked(self.LayerNorm1(X_Decoder), None, Mask_Decoder))
106
+ X_Decoder = self.Dropout2(X_Decoder + self.FeedForward (self.LayerNorm2(X_Decoder) ))
107
+ return X_Decoder
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+ class Model(nn.Module):
128
+ def __init__(self, config):
129
+ super().__init__()
130
+ # Varables
131
+ self.Dim_Attention = config[varables.DIM_ATTENTION]
132
+ self.Token_Padding_Decoder = config["Token_Padding_Decoder"]
133
+ # Embedding and positional encoding layers
134
+ self.Embedding_Decoder = nn.Embedding(len(config["vocab_decoder"]), config[varables.DIM_ATTENTION])
135
+ self.pos_emb = PositionalEncoder(config)
136
+ # Dropout and normalization layers
137
+ self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
138
+ self.LayerNorm1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
139
+ # Transformer layers
140
+ self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
141
+ # Output layer
142
+ self.head = nn.Linear(config[varables.DIM_ATTENTION], len(config["vocab_decoder"]), bias=False)
143
+ # Init
144
+ self.apply(self._init_weights)
145
+ self.optimizer = None
146
+ # logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
147
+
148
+ def _init_weights(self, module):
149
+ for p in module.parameters():
150
+ if p.dim() > 1:
151
+ nn.init.xavier_uniform_(p)
152
+ # if isinstance(module, (nn.Linear, nn.Embedding)):
153
+ # module.weight.data.normal_(mean=0.0, std=0.02)
154
+ # if isinstance(module, nn.Linear) and module.bias is not None:
155
+ # module.bias.data.zero_()
156
+ # elif isinstance(module, nn.LayerNorm):
157
+ # module.bias.data.zero_()
158
+ # module.weight.data.fill_(1.0)
159
+ def init_optimizers(self,train_config):
160
+ optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
161
+ return optimizer
162
+ def init_scheduler(self,train_config):
163
+ scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
164
+ return scheduler
165
+ def get_collate_fn(self, vocab_encoder,vocab_decoder):
166
+ def collate(results):
167
+ X_Encoder = [a[0] for a in results]
168
+ X_Decoder = [a[1] for a in results]
169
+ boundary = -1
170
+ max_len_x = max([len(a) for a in X_Encoder])
171
+ max_len_y = max([len(a) for a in X_Decoder])
172
+ x = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in X_Encoder],dtype=torch.long)
173
+ y = torch.tensor([(a+[vocab_decoder[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in X_Decoder],dtype=torch.long)
174
+ return x,y,boundary
175
+ return collate
176
+
177
+ def generate_masks(self, X_Decoder):
178
+ # Generate encoder, decoder, cross masks
179
+ T = X_Decoder.shape[1]
180
+ Mask_Decoder = (X_Decoder != self.Token_Padding_Decoder).unsqueeze(-2).unsqueeze(-2).repeat(1,1,T,1)
181
+ mask_tril = torch.tril(torch.ones(T, T)).view(1, 1, T, T).to(Mask_Decoder.device)
182
+ Mask_Decoder = Mask_Decoder.masked_fill(mask_tril==0,0)
183
+ return Mask_Decoder
184
+
185
+ def forward(self, X_Encoder, X_Decoder, Y_Decoder_Ref=None,boundary=None):
186
+ Mask_Decoder = self.generate_masks(X_Decoder)
187
+ # preprocess
188
+ X_Decoder = self.Dropout1(self.Embedding_Decoder(X_Decoder) * math.sqrt(self.Dim_Attention) + self.pos_emb(X_Decoder.size(1)))
189
+ # Decoder blocks
190
+ for decoder_block in self.decoder_blocks:
191
+ X_Decoder = decoder_block(X_Decoder,Mask_Decoder)
192
+ X_Decoder = self.LayerNorm1(X_Decoder)
193
+ Y_Decoder_Logits = self.head(X_Decoder)
194
+ loss = None
195
+ if Y_Decoder_Ref is not None:
196
+ loss = F.cross_entropy(Y_Decoder_Logits.view(-1, Y_Decoder_Logits.size(-1)), Y_Decoder_Ref.view(-1),ignore_index=self.Token_Padding_Decoder)
197
+ return Y_Decoder_Logits, loss
SCMG/models/GPT/sampler.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.nn import functional as F
6
+
7
+ def set_seed(seed):
8
+ random.seed(seed)
9
+ np.random.seed(seed)
10
+ torch.manual_seed(seed)
11
+ torch.cuda.manual_seed_all(seed)
12
+
13
+ def top_k_logits(logits, k):
14
+ v, ix = torch.topk(logits, k)
15
+ out = logits.clone()
16
+ out[out < v[:, [-1]]] = -float('Inf')
17
+ return out
18
+
19
+ @torch.no_grad()
20
+ def sample(model, x, steps, temperature=1.0, sample=False, top_k=None):
21
+ block_size = model.get_block_size()
22
+ model.eval()
23
+ for k in range(steps):
24
+ x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
25
+ logits, _ = model(x_cond)
26
+ logits = logits[:, -1, :] / temperature
27
+ if top_k is not None:
28
+ logits = top_k_logits(logits, top_k)
29
+ probs = F.softmax(logits, dim=-1)
30
+ if sample:
31
+ ix = torch.multinomial(probs, num_samples=1)
32
+ else:
33
+ _, ix = torch.topk(probs, k=1, dim=-1)
34
+ x = torch.cat((x, ix), dim=1)
35
+
36
+ return x
37
+
38
+
39
+
40
+
41
+ @torch.no_grad()
42
+ def sample(model, x, steps, temperature=1.0,boundary=None):
43
+ block_size = model.get_block_size()
44
+ model.eval()
45
+ for k in range(steps):
46
+ x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
47
+ logits, _ = model(x_cond,boundary=boundary)
48
+ logits = logits[:, -1, :] / temperature
49
+ probs = F.softmax(logits, dim=-1)
50
+ ix = torch.multinomial(probs, num_samples=1)
51
+ x = torch.cat((x, ix), dim=1)
52
+ return x
53
+
54
+ 'L_5*C(=O)NCc1cccc(OC)c1.*c1nsc2ccccc12COc1cccc(CNC(=O)c2cccc(NC(=O)c3nsc4ccccc34)c2)c1'
55
+
56
+ # for i in range(1,21):
57
+ def sample_L(i,option='string'):
58
+ # i=2
59
+ prefix = 'L_'+str(i)
60
+ string_input = prefix + '*O=C1NN=Cc2c1cccc2.*O=C(C1CC1)N1CCNCC1'
61
+ array_input = [vocab[a] for a in ['<bos>'] + list(string_input)]
62
+ boundary = [len(array_input)]
63
+ tensor_input = torch.tensor(array_input,device='cuda').unsqueeze(0).repeat(32,1)
64
+ boundary = boundary*32
65
+ tensor_output = sample(model,tensor_input,250,boundary=boundary)
66
+ strings_output = []
67
+ for j in range(tensor_output.shape[0]):
68
+ list_string_output = [inv[a] for a in tensor_output[j,boundary[j]:].cpu().numpy() if a != vocab['<pad>']]
69
+ # if list_string_output[0] == '<bos>':
70
+ # list_string_output = list_string_output[1:]
71
+ if list_string_output[-1] == '<eos>':
72
+ list_string_output = list_string_output[:-1]
73
+ string_output = ''.join(list_string_output)
74
+ strings_output.append(string_output)
75
+ print(string_output)
76
+ for j in range(tensor_output.shape[0]):
77
+ if test_valid(strings_output[j]):
78
+ print(1)
79
+ else:
80
+ print(0)
81
+
82
+ # logits,_ = model(tensor_input,boundary=boundary)
83
+
84
+
85
+ ['<bos>', 'L', '_', '5', '*', 'C', '(', '=', 'O', ')', 'N', 'C', 'c', '1', 'c', 'c', 'c', 'c', '(', 'O', 'C', ')', 'c', '1', '.', '*', 'c', '1', 'n', 's', 'c', '2', 'c', 'c', 'c', 'c', 'c', '1', '2', 'C', 'O', 'c', '1', 'c', 'c', 'c', 'c', '(', 'C', 'N', 'C', '(', '=', 'O', ')', 'c', '2', 'c', 'c', 'c', 'c', '(', 'N', 'C', '(', '=', 'O', ')', 'c', '3', 'n', 's', 'c', '4', 'c', 'c', 'c', 'c', 'c', '3', '4', ')', 'c', '2', ')', 'c', '1', '<eos>']
SCMG/models/GPT2/__init__.py ADDED
File without changes
SCMG/models/GPT2/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (173 Bytes). View file
 
SCMG/models/GPT2/__pycache__/model.cpython-310.pyc ADDED
Binary file (7.56 kB). View file
 
SCMG/models/GPT2/__pycache__/sampler.cpython-310.pyc ADDED
Binary file (3.17 kB). View file
 
SCMG/models/GPT2/model.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import logging
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ from torch.nn import functional as F
7
+
8
+ # logger = logging.getLogger(__name__)
9
+ from SCMG.config import varables
10
+ from torch.autograd import Variable
11
+
12
+ class PositionalEncoder(nn.Module):
13
+ def __init__(self, config):
14
+ super(PositionalEncoder, self).__init__()
15
+ self.Dropout = nn.Dropout(p=config[varables.RATE_DROPOUT])
16
+ max_len = config[varables.SIZE_BLOCK]
17
+ pe = torch.zeros(max_len, config[varables.DIM_ATTENTION])
18
+ position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
19
+ div_term = torch.exp(torch.arange(0, config[varables.DIM_ATTENTION], 2).float() * (-math.log(10000.0) / config[varables.DIM_ATTENTION]))
20
+ pe[:, 0::2] = torch.sin(position * div_term)
21
+ pe[:, 1::2] = torch.cos(position * div_term)
22
+ pe = pe.unsqueeze(0)
23
+ self.register_buffer('pe', pe)
24
+ def forward(self, T):
25
+ x = self.Dropout(self.pe[:,:T, :])
26
+ return x
27
+
28
+
29
+ class Attention(nn.Module):
30
+ def __init__(self, config):
31
+ super().__init__()
32
+ assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
33
+ self.Key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
34
+ self.Query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
35
+ self.Value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
36
+ self.Dropout_Attention = nn.Dropout(config[varables.RATE_DROPOUT])
37
+ self.Dropout_Residue = nn.Dropout(config[varables.RATE_DROPOUT])
38
+ self.Projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
39
+ self.NumberOfHeads = config[varables.NUM_HEADS]
40
+ self.DimHead = config[varables.DIM_ATTENTION] // self.NumberOfHeads
41
+ self.DimAttention = config[varables.DIM_ATTENTION]
42
+
43
+ def forward(self, X_1,X_2, mask=None):
44
+ if X_2 is None:
45
+ X_2 = X_1
46
+ BatchSize, T_Encoder, _ = X_1.size()
47
+ BatchSize, T_Decoder, _ = X_2.size()
48
+ K = self.Key( X_1).view(BatchSize, T_Encoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
49
+ Q = self.Query(X_2).view(BatchSize, T_Decoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
50
+ V = self.Value(X_1).view(BatchSize, T_Encoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
51
+ # k,q,v dimension: (BatchSize, SequenceSize, NumberOfHeads, HeadDimension) 3,4,5,16
52
+ ScoreAttention = (Q @ K.transpose(-2, -1)) / math.sqrt(self.DimHead)
53
+ ScoreAttention = ScoreAttention.masked_fill(mask==0, -1e9)
54
+ ScoreAttention = F.softmax(ScoreAttention, dim=-1)
55
+ ScoreAttention = self.Dropout_Attention(ScoreAttention)
56
+ # k.transpose(-2,-1): 3,4,16,5
57
+ # (q@(k.transpose(-2,-1))): 3,4,5,5
58
+ Z = ScoreAttention @ V
59
+ # y dimension: 3,4,5,16
60
+ Z = Z.transpose(1, 2).contiguous().view(BatchSize, T_Decoder, self.DimAttention)
61
+ # y dimension: 3,5,64
62
+ Z = self.Dropout_Residue(self.Projection(Z))
63
+ return Z
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
73
+
74
+ class FeedForward(nn.Module):
75
+ def __init__(self, config):
76
+ super().__init__()
77
+ if config[varables.DIM_FEEDFORWARD] == 0:
78
+ Dim_FeedForward = config[varables.DIM_ATTENTION] *4
79
+ else:
80
+ Dim_FeedForward = config[varables.DIM_FEEDFORWARD]
81
+ self.Linear1 = nn.Linear(config[varables.DIM_EMBEDDING], Dim_FeedForward)
82
+ self.GELU = nn.GELU()
83
+ self.Linear2 = nn.Linear(Dim_FeedForward, config[varables.DIM_EMBEDDING])
84
+ self.Dropout = nn.Dropout(config[varables.RATE_DROPOUT])
85
+
86
+ def forward(self,x):
87
+ x = self.Linear1(x)
88
+ x = self.GELU (x)
89
+ x = self.Dropout(x)
90
+ x = self.Linear2(x)
91
+ return x
92
+
93
+ class DecoderBlock(nn.Module):
94
+ def __init__(self, config):
95
+ super().__init__()
96
+ self.LayerNorm1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
97
+ self.LayerNorm2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
98
+ self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
99
+ self.Dropout2 = nn.Dropout(config[varables.RATE_DROPOUT])
100
+ self.AttentionMasked = Attention( config)
101
+ self.AttentionCross = Attention( config)
102
+ self.FeedForward = FeedForward(config)
103
+
104
+ def forward(self, X_Decoder,Mask_Decoder):
105
+ X_Decoder = self.Dropout1(X_Decoder + self.AttentionMasked(self.LayerNorm1(X_Decoder), None, Mask_Decoder))
106
+ X_Decoder = self.Dropout2(X_Decoder + self.FeedForward (self.LayerNorm2(X_Decoder) ))
107
+ return X_Decoder
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+ class Model(nn.Module):
128
+ def __init__(self, config):
129
+ super().__init__()
130
+ # Varables
131
+ self.Dim_Attention = config[varables.DIM_ATTENTION]
132
+ self.Token_Padding_Decoder = config["Token_Padding_Decoder"]
133
+ # Embedding and positional encoding layers
134
+ self.Embedding_Decoder = nn.Embedding(len(config["vocab_decoder"]), config[varables.DIM_ATTENTION])
135
+ self.pos_emb = PositionalEncoder(config)
136
+ # Dropout and normalization layers
137
+ self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
138
+ self.LayerNorm1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
139
+ # Transformer layers
140
+ self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_DECODER_LAYERS])])
141
+ # Output layer
142
+ self.head = nn.Linear(config[varables.DIM_ATTENTION], len(config["vocab_decoder"]), bias=False)
143
+ # Init
144
+ self.apply(self._init_weights)
145
+ self.optimizer = None
146
+ # logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
147
+
148
+ def _init_weights(self, module):
149
+ for p in module.parameters():
150
+ if p.dim() > 1:
151
+ nn.init.xavier_uniform_(p)
152
+ # if isinstance(module, (nn.Linear, nn.Embedding)):
153
+ # module.weight.data.normal_(mean=0.0, std=0.02)
154
+ # if isinstance(module, nn.Linear) and module.bias is not None:
155
+ # module.bias.data.zero_()
156
+ # elif isinstance(module, nn.LayerNorm):
157
+ # module.bias.data.zero_()
158
+ # module.weight.data.fill_(1.0)
159
+ def init_optimizers(self,train_config):
160
+ optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
161
+ return optimizer
162
+ def init_scheduler(self,train_config):
163
+ scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
164
+ return scheduler
165
+ def get_collate_fn(self, vocab_encoder,vocab_decoder):
166
+ def collate(results):
167
+ X_Encoder = [a[0] for a in results]
168
+ X_Decoder = [a[1] for a in results]
169
+ boundary = -1
170
+ max_len_x = max([len(a) for a in X_Encoder])
171
+ max_len_y = max([len(a) for a in X_Decoder])
172
+ x = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in X_Encoder],dtype=torch.long)
173
+ y = torch.tensor([(a+[vocab_decoder[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in X_Decoder],dtype=torch.long)
174
+ return x,y,boundary
175
+ return collate
176
+
177
+ def generate_masks(self, X_Decoder):
178
+ # Generate encoder, decoder, cross masks
179
+ T = X_Decoder.shape[1]
180
+ Mask_Decoder = (X_Decoder != self.Token_Padding_Decoder).unsqueeze(-2).unsqueeze(-2).repeat(1,1,T,1)
181
+ mask_tril = torch.tril(torch.ones(T, T)).view(1, 1, T, T).to(Mask_Decoder.device)
182
+ Mask_Decoder = Mask_Decoder.masked_fill(mask_tril==0,0)
183
+ return Mask_Decoder
184
+
185
+ def forward(self, X_Encoder, X_Decoder, Y_Decoder_Ref=None,boundary=None):
186
+ Mask_Decoder = self.generate_masks(X_Decoder)
187
+ # preprocess
188
+ X_Decoder = self.Dropout1(self.Embedding_Decoder(X_Decoder) * math.sqrt(self.Dim_Attention) + self.pos_emb(X_Decoder.size(1)))
189
+ # Decoder blocks
190
+ for decoder_block in self.decoder_blocks:
191
+ X_Decoder = decoder_block(X_Decoder,Mask_Decoder)
192
+ X_Decoder = self.LayerNorm1(X_Decoder)
193
+ Y_Decoder_Logits = self.head(X_Decoder)
194
+ loss = None
195
+ if Y_Decoder_Ref is not None:
196
+ loss = F.cross_entropy(Y_Decoder_Logits.view(-1, Y_Decoder_Logits.size(-1)), Y_Decoder_Ref.view(-1),ignore_index=self.Token_Padding_Decoder)
197
+ return Y_Decoder_Logits, loss
SCMG/models/GPT2/sampler.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.nn import functional as F
6
+
7
+ def set_seed(seed):
8
+ random.seed(seed)
9
+ np.random.seed(seed)
10
+ torch.manual_seed(seed)
11
+ torch.cuda.manual_seed_all(seed)
12
+
13
+ def top_k_logits(logits, k):
14
+ v, ix = torch.topk(logits, k)
15
+ out = logits.clone()
16
+ out[out < v[:, [-1]]] = -float('Inf')
17
+ return out
18
+
19
+ @torch.no_grad()
20
+ def sample(model, x, steps, temperature=1.0, sample=False, top_k=None):
21
+ block_size = model.get_block_size()
22
+ model.eval()
23
+ for k in range(steps):
24
+ x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
25
+ logits, _ = model(x_cond)
26
+ logits = logits[:, -1, :] / temperature
27
+ if top_k is not None:
28
+ logits = top_k_logits(logits, top_k)
29
+ probs = F.softmax(logits, dim=-1)
30
+ if sample:
31
+ ix = torch.multinomial(probs, num_samples=1)
32
+ else:
33
+ _, ix = torch.topk(probs, k=1, dim=-1)
34
+ x = torch.cat((x, ix), dim=1)
35
+
36
+ return x
37
+
38
+
39
+
40
+
41
+ @torch.no_grad()
42
+ def sample(model, x, steps, temperature=1.0,boundary=None):
43
+ block_size = model.get_block_size()
44
+ model.eval()
45
+ for k in range(steps):
46
+ x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
47
+ logits, _ = model(x_cond,boundary=boundary)
48
+ logits = logits[:, -1, :] / temperature
49
+ probs = F.softmax(logits, dim=-1)
50
+ ix = torch.multinomial(probs, num_samples=1)
51
+ x = torch.cat((x, ix), dim=1)
52
+ return x
53
+
54
+ 'L_5*C(=O)NCc1cccc(OC)c1.*c1nsc2ccccc12COc1cccc(CNC(=O)c2cccc(NC(=O)c3nsc4ccccc34)c2)c1'
55
+
56
+ # for i in range(1,21):
57
+ def sample_L(i,option='string'):
58
+ # i=2
59
+ prefix = 'L_'+str(i)
60
+ string_input = prefix + '*O=C1NN=Cc2c1cccc2.*O=C(C1CC1)N1CCNCC1'
61
+ array_input = [vocab[a] for a in ['<bos>'] + list(string_input)]
62
+ boundary = [len(array_input)]
63
+ tensor_input = torch.tensor(array_input,device='cuda').unsqueeze(0).repeat(32,1)
64
+ boundary = boundary*32
65
+ tensor_output = sample(model,tensor_input,250,boundary=boundary)
66
+ strings_output = []
67
+ for j in range(tensor_output.shape[0]):
68
+ list_string_output = [inv[a] for a in tensor_output[j,boundary[j]:].cpu().numpy() if a != vocab['<pad>']]
69
+ # if list_string_output[0] == '<bos>':
70
+ # list_string_output = list_string_output[1:]
71
+ if list_string_output[-1] == '<eos>':
72
+ list_string_output = list_string_output[:-1]
73
+ string_output = ''.join(list_string_output)
74
+ strings_output.append(string_output)
75
+ print(string_output)
76
+ for j in range(tensor_output.shape[0]):
77
+ if test_valid(strings_output[j]):
78
+ print(1)
79
+ else:
80
+ print(0)
81
+
82
+ # logits,_ = model(tensor_input,boundary=boundary)
83
+
84
+
85
+ ['<bos>', 'L', '_', '5', '*', 'C', '(', '=', 'O', ')', 'N', 'C', 'c', '1', 'c', 'c', 'c', 'c', '(', 'O', 'C', ')', 'c', '1', '.', '*', 'c', '1', 'n', 's', 'c', '2', 'c', 'c', 'c', 'c', 'c', '1', '2', 'C', 'O', 'c', '1', 'c', 'c', 'c', 'c', '(', 'C', 'N', 'C', '(', '=', 'O', ')', 'c', '2', 'c', 'c', 'c', 'c', '(', 'N', 'C', '(', '=', 'O', ')', 'c', '3', 'n', 's', 'c', '4', 'c', 'c', 'c', 'c', 'c', '3', '4', ')', 'c', '2', ')', 'c', '1', '<eos>']
SCMG/models/LSTM/__init__.py ADDED
File without changes
SCMG/models/LSTM/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (173 Bytes). View file
 
SCMG/models/LSTM/__pycache__/model.cpython-310.pyc ADDED
Binary file (2.76 kB). View file
 
SCMG/models/LSTM/__pycache__/sampler.cpython-310.pyc ADDED
Binary file (1 kB). View file
 
SCMG/models/LSTM/__pycache__/trainer.cpython-310.pyc ADDED
Binary file (5.35 kB). View file
 
SCMG/models/LSTM/model.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ import torch.nn.utils.rnn as rnn_utils
5
+ from SCMG.config import varables
6
+
7
+ class Model(nn.Module):
8
+ def __init__(self, config):
9
+ super().__init__()
10
+ self.vocab = config["vocab_encoder"]
11
+ # self.vocabulary = vocabulary
12
+ # self.hidden_size = config.hidden
13
+ # self.num_layers = config.num_layers
14
+ # self.dropout = config.dropout
15
+ # self.vocab_size = self.input_size = self.output_size = len(vocabulary)
16
+ self.embedding_layer = nn.Embedding(len(config["vocab_encoder"]), config[varables.DIM_EMBEDDING])
17
+ self.lstm_layer = nn.LSTM(config[varables.DIM_EMBEDDING], config[varables.DIM_LSTM],
18
+ config[varables.NUM_LAYERS], dropout=config[varables.RATE_DROPOUT],
19
+ batch_first=True)
20
+ self.linear_layer = nn.Linear(config[varables.DIM_LSTM], len(config["vocab_encoder"]))
21
+ def get_collate_fn(self, vocab_encoder,vocab_decoder):
22
+ def collate(results):
23
+ x_in = None
24
+ y_in = [a[0] + [vocab_encoder[varables.TOKEN_SEP]] + a[1] for a in results]
25
+ # boundary = [a[2] for a in results]
26
+ max_len = max([len(a) for a in y_in])
27
+ y = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len-len(a))) for a in y_in],dtype=torch.long)
28
+ return x_in,y,0
29
+ return collate
30
+ def init_optimizers(self,train_config):
31
+ optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
32
+ return optimizer
33
+ def init_scheduler(self,train_config):
34
+ scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
35
+ return scheduler
36
+ def forward(self, src, trg, trg_out, boundary=None):
37
+ # x = ([src , torch.tensor([self.vocab["<sep>"]]*x.size[0]).unsqueeze(1).to(x.device), trg],dim=1)
38
+ hiddens=None
39
+ x = self.embedding_layer(trg)
40
+ # x = rnn_utils.pack_padded_sequence(x, lengths, batch_first=True)
41
+ self.lstm_layer.flatten_parameters()
42
+ x, hiddens = self.lstm_layer(x, hiddens)
43
+ # x, _ = rnn_utils.pad_packed_sequence(x, batch_first=True)
44
+ logits = self.linear_layer(x)
45
+ loss = None
46
+ if trg_out is not None:
47
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), trg_out.view(-1))
48
+ return logits, loss
SCMG/models/LSTM/sampler.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from MoleculeProcessing.utils.utils import *
2
+ from MoleculeProcessing.utils.utils_sample import *
3
+ import torch.nn.functional as F
4
+
5
+ def sample(model,vocab_bos,size_batch=32,size_block=70,temperature=1.,):
6
+ model,device = load_to_device(model)
7
+ model.eval()
8
+ with torch.no_grad():
9
+ tensor_sampled = torch.zeros(size_batch,size_block+1,dtype=torch.long,device=device)
10
+ tensor_sampled[:,0] = vocab_bos
11
+ hiddens = None
12
+ for i in range(size_block):
13
+ input_current = tensor_sampled[:,[i]]
14
+ probs,hiddens = model.forward(input_current,hiddens)
15
+ probs = probs[:,-1]
16
+ probs = probs * temperature
17
+ probs = F.softmax(probs,dim=-1)
18
+ sample = torch.distributions.categorical.Categorical(probs).sample()
19
+ tensor_sampled[:,i+1] = sample
20
+ return tensor_sampled
SCMG/models/LSTM/trainer.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import logging
3
+ import time
4
+ from tqdm import tqdm
5
+ import numpy as np
6
+
7
+ import torch
8
+ import torch.nn as nn
9
+ import torch.optim as optim
10
+ from torch.optim.lr_scheduler import LambdaLR
11
+ from torch.utils.data.dataloader import DataLoader
12
+ from MoleculeProcessing.utils.utils_train import *
13
+ logger = logging.getLogger(__name__)
14
+ from MoleculeProcessing.utils.utils import *
15
+ from MoleculeProcessing.utils.utils_train import *
16
+ from MoleculeProcessing.config.config import *
17
+
18
+ class TrainerConfig:
19
+ learning_rate = 3e-4
20
+ betas = (0.9, 0.95)
21
+ grad_norm_clip = 1.0
22
+ weight_decay = 0.1
23
+ lr_decay = False
24
+ warmup_tokens = 375e6
25
+ final_tokens = 260e9
26
+ ckpt_path = None
27
+ num_workers = 0
28
+ config = None
29
+ epoch = 0
30
+
31
+ def __init__(self, **kwargs):
32
+ for k,v in kwargs.items():
33
+ setattr(self, k, v)
34
+
35
+ class Trainer:
36
+ def __init__(self, model, train_dataset, test_dataset, config):
37
+ self.model = model
38
+ self.train_dataset = train_dataset
39
+ self.test_dataset = test_dataset
40
+ self.config = config
41
+ # continue train if previous model exists
42
+ self.train_log = init_train_log()
43
+ if os.path.exists(os.path.join(self.config.config.path_checkpoint,LOG_TRAIN_LATEST)):
44
+ self.train_log = pd.read_csv(os.path.join(self.config.config.path_checkpoint,LOG_TRAIN_LATEST))
45
+ self.config.epoch = self.train_log.shape[0]
46
+ if self.train_log.shape[0]>0:
47
+ self.model = load_model( self.config.config.path_checkpoint,self.config.epoch-1)
48
+ self.optimizer = load_optimizer(self.config.config.path_checkpoint,self.config.epoch-1)
49
+ self.tokens = self.train_log.loc[self.config.epoch-1,TOKENS]
50
+ self.scheduler = load_scheduler(self.config.config.path_checkpoint,self.config.epoch-1)
51
+ else:
52
+ self.tokens = 0 # counter used for learning rate decay
53
+ self.optimizer = model.configure_optimizers(config)
54
+ self.scheduler = optim.lr_scheduler.StepLR(self.optimizer,
55
+ 10,
56
+ 0.5)
57
+ self.criterion = nn.CrossEntropyLoss()
58
+ # take over whatever gpus are on the system
59
+ self.device = 'cpu'
60
+ if torch.cuda.is_available():
61
+ self.device = torch.cuda.current_device()
62
+ self.model = torch.nn.DataParallel(self.model).to(self.device)
63
+
64
+ def save_checkpoint(self):
65
+ path_checkpoint = self.config.config.path_checkpoint
66
+ # DataParallel wrappers keep raw model object in .module attribute
67
+ raw_model = self.model.module if hasattr(self.model, "module") else self.model
68
+ logger.info("saving %s", path_checkpoint)
69
+ path_model_epoch = add_before_extension(os.path.join(path_checkpoint,
70
+ MODEL_LATEST),
71
+ str(self.config.epoch))
72
+ torch.save(raw_model, path_model_epoch)
73
+ # optimizer
74
+ path_optimizer_epoch = \
75
+ add_before_extension(
76
+ os.path.join(
77
+ path_checkpoint,
78
+ OPTIMIZER_LATEST
79
+ ),
80
+ str(self.config.epoch)
81
+ )
82
+ torch.save(
83
+ self.optimizer,
84
+ path_optimizer_epoch
85
+ )
86
+ # optimizer
87
+ path_scheduler_epoch = \
88
+ add_before_extension(
89
+ os.path.join(
90
+ path_checkpoint,
91
+ SCHEDULER_LATEST
92
+ ),
93
+ str(self.config.epoch)
94
+ )
95
+ torch.save(
96
+ self.scheduler,
97
+ path_scheduler_epoch
98
+ )
99
+ # train log
100
+ self.train_log.to_csv(
101
+ os.path.join(
102
+ path_checkpoint,
103
+ LOG_TRAIN_LATEST
104
+ )
105
+ ,index=False
106
+ )
107
+ path_train_log_epoch = \
108
+ add_before_extension(
109
+ os.path.join(
110
+ path_checkpoint,
111
+ LOG_TRAIN_LATEST
112
+ ),
113
+ str(self.config.epoch)
114
+ )
115
+ self.train_log.to_csv(
116
+ path_train_log_epoch,
117
+ index=False)
118
+
119
+ # torch.save(self.token,os.path.join(path_checkpoint,'tokens_'+self.config.epoch+'.pt'))
120
+ def train(self):
121
+ model, config = self.model, self.config
122
+ raw_model = model.module if hasattr(self.model, "module") else model
123
+ optimizer = self.optimizer
124
+ scheduler = self.scheduler
125
+ while self.config.epoch < config.config.epochs and self.config.epoch != config.config.epochs:
126
+ current_status = dict([[a,None] for a in self.train_log.columns])
127
+ current_status[EPOCH] = self.config.epoch
128
+ time_start = time.time()
129
+ current_status = self.run_epoch('train',current_status)
130
+ current_status[TIME_ELAPSED] = int(time.time()-time_start)
131
+ current_status[TOKENS] = self.tokens
132
+ if self.test_dataset is not None:
133
+ current_status = self.run_epoch('test',current_status)
134
+ self.train_log.loc[self.config.epoch] = current_status
135
+ scheduler.step()
136
+ self.save_checkpoint()
137
+ self.config.epoch += 1
138
+
139
+ def run_epoch(self,split,current_status):
140
+ model = self.model
141
+ is_train = split == 'train'
142
+ model.train(is_train)
143
+ data = self.train_dataset if is_train else self.test_dataset
144
+ data.shuffle(random_state=self.config.epoch)
145
+ loader = DataLoader(data, shuffle=False, pin_memory=True,
146
+ batch_size=self.config.config.size_batch,
147
+ num_workers=self.config.num_workers)
148
+
149
+ losses = []
150
+ pbar = tqdm(enumerate(loader), total=len(loader)) if is_train else enumerate(loader)
151
+ for it, (x, y) in pbar:
152
+
153
+ # place data on the correct device
154
+ x = x.to(self.device)
155
+ y = y.to(self.device)
156
+
157
+ # forward the model
158
+ with torch.set_grad_enabled(is_train):
159
+ outputs,_ = model.forward(x)
160
+ loss = self.criterion(outputs.view(-1, outputs.shape[-1]),
161
+ y.view(-1))
162
+ loss = loss.mean() # collapse all losses if they are scattered on multiple gpus
163
+ losses.append(loss.item())
164
+
165
+ if is_train:
166
+ # backprop and update the parameters
167
+ model.zero_grad()
168
+ loss.backward()
169
+ torch.nn.utils.clip_grad_norm_(model.parameters(), self.config.grad_norm_clip)
170
+ self.optimizer.step()
171
+
172
+ # decay the learning rate based on our progress
173
+ if self.config.lr_decay:
174
+ self.tokens += (y >= 0).sum() # number of tokens processed this step (i.e. label is not -100)
175
+ if self.tokens < self.config.warmup_tokens:
176
+ # linear warmup
177
+ lr_mult = float(self.tokens) / float(max(1, self.config.warmup_tokens))
178
+ else:
179
+ # cosine learning rate decay
180
+ progress = float(self.tokens - self.config.warmup_tokens) / float(max(1, self.config.final_tokens - self.config.warmup_tokens))
181
+ lr_mult = max(0.1, 0.5 * (1.0 + math.cos(math.pi * progress)))
182
+ lr = self.config.learning_rate * lr_mult
183
+ for param_group in optimizer.param_groups:
184
+ param_group['lr'] = lr
185
+ else:
186
+ lr = self.config.learning_rate
187
+ current_status[LR] = lr
188
+
189
+ # report progress
190
+ pbar.set_description(f"epoch {self.config.epoch+1} iter {it}: train loss {loss.item():.5f}. lr {lr:e}")
191
+ current_status[split+'_loss'] = float(np.mean(losses))
192
+ if not is_train:
193
+ test_loss = float(np.mean(losses))
194
+ logger.info("test loss: %f", test_loss)
195
+ return current_status
SCMG/models/Reinvent/__init__.py ADDED
File without changes
SCMG/models/Reinvent/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (177 Bytes). View file
 
SCMG/models/Reinvent/__pycache__/model copy 2.cpython-310.pyc ADDED
Binary file (14.4 kB). View file
 
SCMG/models/Reinvent/__pycache__/model copy.cpython-310.pyc ADDED
Binary file (8.39 kB). View file
 
SCMG/models/Reinvent/__pycache__/model.cpython-310.pyc ADDED
Binary file (8.79 kB). View file
 
SCMG/models/Reinvent/__pycache__/sampler.cpython-310.pyc ADDED
Binary file (3.17 kB). View file
 
SCMG/models/Reinvent/model copy 2.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import logging
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ from torch.nn import functional as F
7
+
8
+ logger = logging.getLogger(__name__)
9
+ from SCMG.config import varables
10
+
11
+ # class ModelConfig():
12
+ # rate_dropout_embedding = 0.1
13
+ # rate_dropout_residue = 0.1
14
+ # rate_dropout_attention = 0.1
15
+ # block_size=125
16
+ # def __init__(self, size_vocab, **kwargs):
17
+ # self.size_vocab = size_vocab
18
+ # for k,v in kwargs.items():
19
+ # setattr(self, k, v)
20
+
21
+ class CausalSelfAttention(nn.Module):
22
+ def __init__(self, config):
23
+ super().__init__()
24
+ assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
25
+ self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
26
+ self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
27
+ self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
28
+ self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
29
+ self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
30
+ self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
31
+ self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
32
+ .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
33
+ self.n_head = config[varables.NUM_HEADS]
34
+ self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
35
+ self.attention_features = config[varables.DIM_ATTENTION]
36
+
37
+ def forward(self, x, layer_past=None):
38
+ B, T, C = x.size()
39
+ k = self.key(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
40
+ q = self.query(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
41
+ v = self.value(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
42
+ att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
43
+ att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
44
+ att = F.softmax(att, dim=-1)
45
+ att = self.dropout_attention(att)
46
+ y = att @ v
47
+ y = y.transpose(1, 2).contiguous().view(B, T, self.attention_features)
48
+ y = self.dropout_residue(self.projection(y))
49
+ return y
50
+
51
+
52
+ class CrossAttention(nn.Module):
53
+ def __init__(self, config):
54
+ super().__init__()
55
+ assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
56
+ self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
57
+ self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
58
+ self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
59
+ self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
60
+ self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
61
+ self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
62
+ self.n_head = config[varables.NUM_HEADS]
63
+ self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
64
+ self.attention_features = config[varables.DIM_ATTENTION]
65
+ self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
66
+ .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
67
+
68
+ def forward(self, x_encoder,x_decoder, layer_past=None):
69
+ B_encoder, T_encoder, C_encoder = x_encoder.size()
70
+ B_decoder, T_decoder, C_decoder = x_decoder.size()
71
+ k = self.key( x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
72
+ q = self.query(x_decoder).view(B_encoder, T_decoder, self.n_head,self.single_head_dim).transpose(1, 2)
73
+ v = self.value(x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
74
+ att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
75
+ att = att.masked_fill(self.mask[:,:,:T_decoder,:T_encoder] == 0, float('-inf'))
76
+ att = F.softmax(att, dim=-1)
77
+ att = self.dropout_attention(att)
78
+ y = att @ v
79
+ y = y.transpose(1, 2).contiguous().view(B_encoder, T_decoder, self.attention_features)
80
+ y = self.dropout_residue(self.projection(y))
81
+ return y
82
+
83
+
84
+
85
+
86
+ class EncoderBlock(nn.Module):
87
+ def __init__(self, config):
88
+ super().__init__()
89
+ self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
90
+ self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
91
+ self.attn = CausalSelfAttention(config)
92
+ self.mlp = nn.Sequential(
93
+ nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
94
+ nn.GELU(),
95
+ nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
96
+ nn.Dropout(config[varables.RATE_DROPOUT]),
97
+ )
98
+
99
+ def forward(self, x):
100
+ # = y_input
101
+ x = x + self.attn(self.ln1(x))
102
+ x = x + self.mlp(self.ln2(x))
103
+ return x
104
+
105
+ class DecoderBlock(nn.Module):
106
+ def __init__(self, config):
107
+ super().__init__()
108
+ self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
109
+ self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
110
+ self.masked_attn = CausalSelfAttention(config)
111
+ self.cross_attn = CrossAttention(config)
112
+ self.mlp = nn.Sequential(
113
+ nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
114
+ nn.GELU(),
115
+ nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
116
+ nn.Dropout(config[varables.RATE_DROPOUT]),
117
+ )
118
+
119
+ def forward(self, x_encoder,x):
120
+ # = y_input
121
+ x = x + self.masked_attn(self.ln1(x))
122
+ x = x + self.cross_attn(x_encoder,self.ln1(x))
123
+ x = x + self.mlp(self.ln2(x))
124
+ return x
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+ import torch
143
+ import torch.nn as nn
144
+ import torch.nn.functional as F
145
+ import math
146
+
147
+
148
+ class Norm(nn.Module):
149
+ def __init__(self, d_model, eps = 1e-6):
150
+ super().__init__()
151
+
152
+ self.size = d_model
153
+
154
+ # create two learnable parameters to calibrate normalisation
155
+ self.alpha = nn.Parameter(torch.ones(self.size))
156
+ self.bias = nn.Parameter(torch.zeros(self.size))
157
+
158
+ self.eps = eps
159
+
160
+ def forward(self, x):
161
+ norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) \
162
+ / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias
163
+ return norm
164
+
165
+ def attention(q, k, v, d_k, mask=None, dropout=None):
166
+
167
+ scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k)
168
+
169
+ if mask is not None:
170
+ mask = mask.unsqueeze(1)
171
+ scores = scores.masked_fill(mask == 0, -1e9)
172
+
173
+ scores = F.softmax(scores, dim=-1)
174
+
175
+ if dropout is not None:
176
+ scores = dropout(scores)
177
+
178
+ output = torch.matmul(scores, v)
179
+ return output
180
+
181
+ class MultiHeadAttention(nn.Module):
182
+ def __init__(self, heads, d_model, dropout = 0.1):
183
+ super().__init__()
184
+
185
+ self.d_model = d_model
186
+ self.d_k = d_model // heads
187
+ self.h = heads
188
+
189
+ self.q_linear = nn.Linear(d_model, d_model)
190
+ self.v_linear = nn.Linear(d_model, d_model)
191
+ self.k_linear = nn.Linear(d_model, d_model)
192
+
193
+ self.dropout = nn.Dropout(dropout)
194
+ self.out = nn.Linear(d_model, d_model)
195
+
196
+ def forward(self, q, k, v, mask=None):
197
+
198
+ bs = q.size(0)
199
+
200
+ # perform linear operation and split into N heads
201
+ k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
202
+ q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
203
+ v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
204
+
205
+ # transpose to get dimensions bs * N * sl * d_model
206
+ k = k.transpose(1,2)
207
+ q = q.transpose(1,2)
208
+ v = v.transpose(1,2)
209
+
210
+
211
+ # calculate attention using function we will define next
212
+ scores = attention(q, k, v, self.d_k, mask, self.dropout)
213
+ # concatenate heads and put through final linear layer
214
+ concat = scores.transpose(1,2).contiguous()\
215
+ .view(bs, -1, self.d_model)
216
+ output = self.out(concat)
217
+
218
+ return output
219
+
220
+ class FeedForward(nn.Module):
221
+ def __init__(self, d_model, d_ff=2048, dropout = 0.1):
222
+ super().__init__()
223
+
224
+ # We set d_ff as a default to 2048
225
+ self.linear_1 = nn.Linear(d_model, d_ff)
226
+ self.dropout = nn.Dropout(dropout)
227
+ self.linear_2 = nn.Linear(d_ff, d_model)
228
+
229
+ def forward(self, x):
230
+ x = self.dropout(F.relu(self.linear_1(x)))
231
+ x = self.linear_2(x)
232
+ return x
233
+
234
+
235
+
236
+
237
+ import torch
238
+ import torch.nn as nn
239
+ import copy
240
+
241
+
242
+ class EncoderLayer(nn.Module):
243
+ def __init__(self, d_model, heads, dropout=0.1):
244
+ super().__init__()
245
+ self.norm_1 = Norm(d_model)
246
+ self.norm_2 = Norm(d_model)
247
+ self.attn = MultiHeadAttention(heads, d_model, dropout=dropout)
248
+ self.ff = FeedForward(d_model, dropout=dropout)
249
+ self.dropout_1 = nn.Dropout(dropout)
250
+ self.dropout_2 = nn.Dropout(dropout)
251
+
252
+ def forward(self, x, mask):
253
+ x2 = self.norm_1(x)
254
+ x = x + self.dropout_1(self.attn(x2,x2,x2,mask))
255
+ x2 = self.norm_2(x)
256
+ x = x + self.dropout_2(self.ff(x2))
257
+ return x
258
+
259
+ # build a decoder layer with two multi-head attention layers and
260
+ # one feed-forward layer
261
+ class DecoderLayer(nn.Module):
262
+ def __init__(self, d_model, heads, dropout=0.1):
263
+ super().__init__()
264
+ self.norm_1 = Norm(d_model)
265
+ self.norm_2 = Norm(d_model)
266
+ self.norm_3 = Norm(d_model)
267
+
268
+ self.dropout_1 = nn.Dropout(dropout)
269
+ self.dropout_2 = nn.Dropout(dropout)
270
+ self.dropout_3 = nn.Dropout(dropout)
271
+
272
+ self.attn_1 = MultiHeadAttention(heads, d_model, dropout=dropout)
273
+ self.attn_2 = MultiHeadAttention(heads, d_model, dropout=dropout)
274
+ self.ff = FeedForward(d_model, dropout=dropout)
275
+
276
+ def forward(self, x, e_outputs, src_mask, trg_mask):
277
+ x2 = self.norm_1(x)
278
+ x = x + self.dropout_1(self.attn_1(x2, x2, x2, trg_mask))
279
+ x2 = self.norm_2(x)
280
+ x = x + self.dropout_2(self.attn_2(x2, e_outputs, e_outputs, \
281
+ src_mask))
282
+ x2 = self.norm_3(x)
283
+ x = x + self.dropout_3(self.ff(x2))
284
+ return x
285
+
286
+
287
+ import torch
288
+ import torch.nn as nn
289
+ import math
290
+ from torch.autograd import Variable
291
+
292
+ class Embedder(nn.Module):
293
+ def __init__(self, vocab_size, d_model):
294
+ super().__init__()
295
+ self.d_model = d_model
296
+ self.embed = nn.Embedding(vocab_size, d_model)
297
+ def forward(self, x):
298
+ return self.embed(x)
299
+
300
+ class PositionalEncoder(nn.Module):
301
+ def __init__(self, d_model, max_seq_len = 200, dropout = 0.1):
302
+ super().__init__()
303
+ self.d_model = d_model
304
+ self.dropout = nn.Dropout(dropout)
305
+ # create constant 'pe' matrix with values dependant on
306
+ # pos and i
307
+ pe = torch.zeros(max_seq_len, d_model)
308
+ for pos in range(max_seq_len):
309
+ for i in range(0, d_model, 2):
310
+ pe[pos, i] = \
311
+ math.sin(pos / (10000 ** ((2 * i)/d_model)))
312
+ pe[pos, i + 1] = \
313
+ math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
314
+ pe = pe.unsqueeze(0)
315
+ self.register_buffer('pe', pe)
316
+
317
+
318
+ def forward(self, x):
319
+ # make embeddings relatively larger
320
+ x = x * math.sqrt(self.d_model)
321
+ #add constant to embedding
322
+ seq_len = x.size(1)
323
+ pe = Variable(self.pe[:,:seq_len], requires_grad=False)
324
+ if x.is_cuda:
325
+ pe.cuda()
326
+ x = x + pe
327
+ return self.dropout(x)
328
+
329
+ def get_clones(module, N):
330
+ return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
331
+
332
+ class Encoder(nn.Module):
333
+ def __init__(self, vocab_size, d_model, N, heads, dropout):
334
+ super().__init__()
335
+ self.N = N
336
+ self.embed = Embedder(vocab_size, d_model)
337
+ self.pe = PositionalEncoder(d_model, dropout=dropout)
338
+ self.layers = get_clones(EncoderLayer(d_model, heads, dropout), N)
339
+ self.norm = Norm(d_model)
340
+ def forward(self, src, mask):
341
+ x = self.embed(src)
342
+ x = self.pe(x)
343
+ for i in range(self.N):
344
+ x = self.layers[i](x, mask)
345
+ return self.norm(x)
346
+
347
+ class Decoder(nn.Module):
348
+ def __init__(self, vocab_size, d_model, N, heads, dropout):
349
+ super().__init__()
350
+ self.N = N
351
+ self.embed = Embedder(vocab_size, d_model)
352
+ self.pe = PositionalEncoder(d_model, dropout=dropout)
353
+ self.layers = get_clones(DecoderLayer(d_model, heads, dropout), N)
354
+ self.norm = Norm(d_model)
355
+ def forward(self, trg, e_outputs, src_mask, trg_mask):
356
+ x = self.embed(trg)
357
+ x = self.pe(x)
358
+ for i in range(self.N):
359
+ x = self.layers[i](x, e_outputs, src_mask, trg_mask)
360
+ return self.norm(x)
361
+
362
+ class Model(nn.Module):
363
+ def __init__(self, config):
364
+ super().__init__()
365
+ self.encoder = Encoder(len(config["vocab_encoder"]), config[varables.DIM_ATTENTION], config[varables.NUM_LAYERS], config[varables.NUM_HEADS], config[varables.RATE_DROPOUT])
366
+ self.decoder = Decoder(len(config["vocab_decoder"]), config[varables.DIM_ATTENTION], config[varables.NUM_LAYERS], config[varables.NUM_HEADS], config[varables.RATE_DROPOUT])
367
+ self.out = nn.Linear(config[varables.DIM_ATTENTION], len(config["vocab_decoder"]))
368
+ # self.tok_emb = nn.Embedding(config[varables.SIZE_VOCAB], config[varables.DIM_EMBEDDING])
369
+ # self.pos_emb = nn.Parameter(torch.zeros(1, config[varables.SIZE_BLOCK], config[varables.DIM_EMBEDDING]))
370
+ # self.drop = nn.Dropout(config[varables.RATE_DROPOUT])
371
+ # self.encoder_blocks = nn.ModuleList([EncoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
372
+ # self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
373
+ # self.blocks = nn.Sequential(*[DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
374
+ # self.ln_f = nn.LayerNorm(config[varables.DIM_EMBEDDING])
375
+ # self.head = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.SIZE_VOCAB], bias=False)
376
+ # self.block_size = config[varables.SIZE_BLOCK]
377
+ # self.apply(self._init_weights)
378
+ # logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
379
+ self.optimizer = None
380
+
381
+ def get_block_size(self):
382
+ return self.block_size
383
+
384
+ def _init_weights(self, module):
385
+ if isinstance(module, (nn.Linear, nn.Embedding)):
386
+ module.weight.data.normal_(mean=0.0, std=0.02)
387
+ if isinstance(module, nn.Linear) and module.bias is not None:
388
+ module.bias.data.zero_()
389
+ elif isinstance(module, nn.LayerNorm):
390
+ module.bias.data.zero_()
391
+ module.weight.data.fill_(1.0)
392
+ def init_optimizers(self,train_config):
393
+ optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
394
+ return optimizer
395
+ def init_scheduler(self,train_config):
396
+ scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
397
+ return scheduler
398
+ def get_collate_fn(self, vocab_encoder,vocab_decoder):
399
+ def collate(results):
400
+ x_in = [a[0] for a in results]
401
+ y_in = [a[1] for a in results]
402
+ boundary = -1
403
+ max_len_x = max([len(a) for a in x_in])
404
+ max_len_y = max([len(a) for a in y_in])
405
+ x = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in x_in],dtype=torch.long)
406
+ y = torch.tensor([(a+[vocab_decoder[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in y_in],dtype=torch.long)
407
+ return x,y,boundary
408
+ return collate
409
+ def forward(self, src, trg, trg_out, boundary=None):
410
+ src_mask = None
411
+ trg_mask = torch.tril(torch.ones(trg.shape[1], trg.shape[1])).view(1, 1, trg.shape[1], trg.shape[1]).to(trg.device)
412
+ e_outputs = self.encoder(src, src_mask)
413
+ d_output = self.decoder(trg, e_outputs, src_mask, trg_mask)
414
+ logits = self.out(d_output)
415
+ loss = None
416
+ if trg_out is not None:
417
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), trg_out.view(-1))
418
+ return logits, loss
419
+
420
+ # mark test
SCMG/models/Reinvent/model copy.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import logging
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ from torch.nn import functional as F
7
+
8
+ logger = logging.getLogger(__name__)
9
+ from SCMG.config import varables
10
+
11
+ # class ModelConfig():
12
+ # rate_dropout_embedding = 0.1
13
+ # rate_dropout_residue = 0.1
14
+ # rate_dropout_attention = 0.1
15
+ # block_size=125
16
+ # def __init__(self, size_vocab, **kwargs):
17
+ # self.size_vocab = size_vocab
18
+ # for k,v in kwargs.items():
19
+ # setattr(self, k, v)
20
+
21
+ class CausalSelfAttention(nn.Module):
22
+ def __init__(self, config):
23
+ super().__init__()
24
+ assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
25
+ self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
26
+ self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
27
+ self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
28
+ self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
29
+ self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
30
+ self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
31
+ self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
32
+ .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
33
+ self.n_head = config[varables.NUM_HEADS]
34
+ self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
35
+ self.attention_features = config[varables.DIM_ATTENTION]
36
+
37
+ def forward(self, x, layer_past=None):
38
+ B, T, C = x.size()
39
+ k = self.key(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
40
+ q = self.query(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
41
+ v = self.value(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
42
+ att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
43
+ att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
44
+ att = F.softmax(att, dim=-1)
45
+ att = self.dropout_attention(att)
46
+ y = att @ v
47
+ y = y.transpose(1, 2).contiguous().view(B, T, self.attention_features)
48
+ y = self.dropout_residue(self.projection(y))
49
+ return y
50
+
51
+
52
+ class CrossAttention(nn.Module):
53
+ def __init__(self, config):
54
+ super().__init__()
55
+ assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
56
+ self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
57
+ self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
58
+ self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
59
+ self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
60
+ self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
61
+ self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
62
+ self.n_head = config[varables.NUM_HEADS]
63
+ self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
64
+ self.attention_features = config[varables.DIM_ATTENTION]
65
+ self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
66
+ .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
67
+
68
+ def forward(self, x_encoder,x_decoder, layer_past=None):
69
+ B_encoder, T_encoder, C_encoder = x_encoder.size()
70
+ B_decoder, T_decoder, C_decoder = x_decoder.size()
71
+ k = self.key( x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
72
+ q = self.query(x_decoder).view(B_encoder, T_decoder, self.n_head,self.single_head_dim).transpose(1, 2)
73
+ v = self.value(x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
74
+ att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
75
+ att = att.masked_fill(self.mask[:,:,:T_decoder,:T_encoder] == 0, float('-inf'))
76
+ att = F.softmax(att, dim=-1)
77
+ att = self.dropout_attention(att)
78
+ y = att @ v
79
+ y = y.transpose(1, 2).contiguous().view(B_encoder, T_decoder, self.attention_features)
80
+ y = self.dropout_residue(self.projection(y))
81
+ return y
82
+
83
+
84
+
85
+
86
+ class EncoderBlock(nn.Module):
87
+ def __init__(self, config):
88
+ super().__init__()
89
+ self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
90
+ self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
91
+ self.attn = CausalSelfAttention(config)
92
+ self.mlp = nn.Sequential(
93
+ nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
94
+ nn.GELU(),
95
+ nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
96
+ nn.Dropout(config[varables.RATE_DROPOUT]),
97
+ )
98
+
99
+ def forward(self, x):
100
+ # = y_input
101
+ x = x + self.attn(self.ln1(x))
102
+ x = x + self.mlp(self.ln2(x))
103
+ return x
104
+
105
+ class DecoderBlock(nn.Module):
106
+ def __init__(self, config):
107
+ super().__init__()
108
+ self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
109
+ self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
110
+ self.masked_attn = CausalSelfAttention(config)
111
+ self.cross_attn = CrossAttention(config)
112
+ self.mlp = nn.Sequential(
113
+ nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
114
+ nn.GELU(),
115
+ nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
116
+ nn.Dropout(config[varables.RATE_DROPOUT]),
117
+ )
118
+
119
+ def forward(self, x_encoder,x):
120
+ # = y_input
121
+ x = x + self.masked_attn(self.ln1(x))
122
+ x = x + self.cross_attn(x_encoder,self.ln1(x))
123
+ x = x + self.mlp(self.ln2(x))
124
+ return x
125
+
126
+ class Model(nn.Module):
127
+ def __init__(self, config):
128
+ super().__init__()
129
+ self.tok_emb = nn.Embedding(config[varables.SIZE_VOCAB], config[varables.DIM_EMBEDDING])
130
+ self.pos_emb = nn.Parameter(torch.zeros(1, config[varables.SIZE_BLOCK], config[varables.DIM_EMBEDDING]))
131
+ self.drop = nn.Dropout(config[varables.RATE_DROPOUT])
132
+ self.encoder_blocks = nn.ModuleList([EncoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
133
+ self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
134
+ # self.blocks = nn.Sequential(*[DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
135
+ self.ln_f = nn.LayerNorm(config[varables.DIM_EMBEDDING])
136
+ self.head = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.SIZE_VOCAB], bias=False)
137
+ self.block_size = config[varables.SIZE_BLOCK]
138
+ self.apply(self._init_weights)
139
+ logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
140
+ self.optimizer = None
141
+
142
+ def get_block_size(self):
143
+ return self.block_size
144
+
145
+ def _init_weights(self, module):
146
+ if isinstance(module, (nn.Linear, nn.Embedding)):
147
+ module.weight.data.normal_(mean=0.0, std=0.02)
148
+ if isinstance(module, nn.Linear) and module.bias is not None:
149
+ module.bias.data.zero_()
150
+ elif isinstance(module, nn.LayerNorm):
151
+ module.bias.data.zero_()
152
+ module.weight.data.fill_(1.0)
153
+ def init_optimizers(self,train_config):
154
+ optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
155
+ return optimizer
156
+ def init_scheduler(self,train_config):
157
+ scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
158
+ return scheduler
159
+ def get_collate_fn(self, vocab):
160
+ def collate(results):
161
+ x_in = [a[0] for a in results]
162
+ y_in = [a[1] for a in results]
163
+ boundary = -1
164
+ max_len_x = max([len(a) for a in x_in])
165
+ max_len_y = max([len(a) for a in y_in])
166
+ x = torch.tensor([(a+[vocab[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in x_in],dtype=torch.long)
167
+ y = torch.tensor([(a+[vocab[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in y_in],dtype=torch.long)
168
+ return x,y,boundary
169
+ return collate
170
+
171
+ def forward(self, x_in, y_in, y_out=None,boundary=None):
172
+ x_in = self.drop(self.tok_emb(x_in) + self.pos_emb[:, :x_in.size()[1], :])
173
+ y_in = self.drop(self.tok_emb(y_in) + self.pos_emb[:, :y_in.size()[1], :])
174
+ #
175
+ for encoder_block in self.encoder_blocks:
176
+ x_in = encoder_block(x_in)
177
+ x_in = self.ln_f(x_in)
178
+ for decoder_block in self.decoder_blocks:
179
+ y_in = decoder_block(x_in,y_in)
180
+ y_in = self.ln_f(y_in)
181
+ logits = self.head(y_in)
182
+ loss = None
183
+ if y_out is not None:
184
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), y_out.view(-1))
185
+ return logits, loss
186
+
187
+ # mark test
SCMG/models/Reinvent/model.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import logging
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ from torch.nn import functional as F
7
+
8
+ # logger = logging.getLogger(__name__)
9
+ from SCMG.config import varables
10
+ from torch.autograd import Variable
11
+
12
+ # class PositionalEncoder(nn.Module):
13
+ # def __init__(self, config):
14
+ # super().__init__()
15
+ # pe = torch.zeros(config[varables.SIZE_BLOCK], config[varables.DIM_ATTENTION])
16
+ # for pos in range(config[varables.SIZE_BLOCK]):
17
+ # for i in range(0, config[varables.DIM_ATTENTION], 2):
18
+ # pe[pos, i] = \
19
+ # math.sin(pos / (10000 ** ((2 * i)/config[varables.DIM_ATTENTION])))
20
+ # pe[pos, i + 1] = \
21
+ # math.cos(pos / (10000 ** ((2 * (i + 1))/config[varables.DIM_ATTENTION])))
22
+ # pe = pe.unsqueeze(0)
23
+ # self.register_buffer('pe', pe)
24
+ # def forward(self, T):
25
+ # #add constant to embedding
26
+ # x = Variable(self.pe[:,:T], requires_grad=False)
27
+ # return x
28
+
29
+
30
+
31
+ class PositionalEncoder(nn.Module):
32
+ def __init__(self, config):
33
+ super(PositionalEncoder, self).__init__()
34
+ self.Dropout = nn.Dropout(p=config[varables.RATE_DROPOUT])
35
+ max_len = config[varables.SIZE_BLOCK]
36
+ pe = torch.zeros(max_len, config[varables.DIM_ATTENTION])
37
+ position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
38
+ div_term = torch.exp(torch.arange(0, config[varables.DIM_ATTENTION], 2).float() * (-math.log(10000.0) / config[varables.DIM_ATTENTION]))
39
+ pe[:, 0::2] = torch.sin(position * div_term)
40
+ pe[:, 1::2] = torch.cos(position * div_term)
41
+ pe = pe.unsqueeze(0)
42
+ self.register_buffer('pe', pe)
43
+ def forward(self, T):
44
+ x = self.Dropout(self.pe[:,:T, :])
45
+ return x
46
+
47
+
48
+
49
+ class Attention(nn.Module):
50
+ def __init__(self, config):
51
+ super().__init__()
52
+ assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
53
+ self.Key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
54
+ self.Query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
55
+ self.Value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
56
+ self.Dropout_Attention = nn.Dropout(config[varables.RATE_DROPOUT])
57
+ self.Dropout_Residue = nn.Dropout(config[varables.RATE_DROPOUT])
58
+ self.Projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
59
+ self.NumberOfHeads = config[varables.NUM_HEADS]
60
+ self.DimHead = config[varables.DIM_ATTENTION] // self.NumberOfHeads
61
+ self.DimAttention = config[varables.DIM_ATTENTION]
62
+
63
+ def forward(self, X_1,X_2, mask=None):
64
+ if X_2 is None:
65
+ X_2 = X_1
66
+ BatchSize, T_Encoder, _ = X_1.size()
67
+ BatchSize, T_Decoder, _ = X_2.size()
68
+ K = self.Key( X_1).view(BatchSize, T_Encoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
69
+ Q = self.Query(X_2).view(BatchSize, T_Decoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
70
+ V = self.Value(X_1).view(BatchSize, T_Encoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
71
+ # k,q,v dimension: (BatchSize, SequenceSize, NumberOfHeads, HeadDimension) 3,4,5,16
72
+ ScoreAttention = (Q @ K.transpose(-2, -1)) / math.sqrt(self.DimHead)
73
+ ScoreAttention = ScoreAttention.masked_fill(mask==0, -1e9)
74
+ ScoreAttention = F.softmax(ScoreAttention, dim=-1)
75
+ ScoreAttention = self.Dropout_Attention(ScoreAttention)
76
+ # k.transpose(-2,-1): 3,4,16,5
77
+ # (q@(k.transpose(-2,-1))): 3,4,5,5
78
+ Z = ScoreAttention @ V
79
+ # y dimension: 3,4,5,16
80
+ Z = Z.transpose(1, 2).contiguous().view(BatchSize, T_Decoder, self.DimAttention)
81
+ # y dimension: 3,5,64
82
+ Z = self.Dropout_Residue(self.Projection(Z))
83
+ return Z
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+ class FeedForward(nn.Module):
95
+ def __init__(self, config):
96
+ super().__init__()
97
+ if config[varables.DIM_FEEDFORWARD] == 0:
98
+ Dim_FeedForward = config[varables.DIM_ATTENTION] *4
99
+ else:
100
+ Dim_FeedForward = config[varables.DIM_FEEDFORWARD]
101
+ self.Linear1 = nn.Linear(config[varables.DIM_EMBEDDING], Dim_FeedForward)
102
+ self.GELU = nn.GELU()
103
+ self.Linear2 = nn.Linear(Dim_FeedForward, config[varables.DIM_EMBEDDING])
104
+ self.Dropout = nn.Dropout(config[varables.RATE_DROPOUT])
105
+
106
+ def forward(self,x):
107
+ x = self.Linear1(x)
108
+ x = self.GELU (x)
109
+ x = self.Dropout(x)
110
+ x = self.Linear2(x)
111
+ return x
112
+
113
+
114
+
115
+
116
+ class EncoderBlock(nn.Module):
117
+ def __init__(self, config):
118
+ super().__init__()
119
+ self.LayerNorm1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
120
+ self.LayerNorm2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
121
+ self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
122
+ self.Dropout2 = nn.Dropout(config[varables.RATE_DROPOUT])
123
+ self.Attention = Attention( config)
124
+ self.FeedForward = FeedForward(config)
125
+
126
+ def forward(self, X_Encoder,Mask_Encoder):
127
+ X_Encoder = self.Dropout1(X_Encoder + self.Attention (self.LayerNorm1(X_Encoder), None, Mask_Encoder))
128
+ X_Encoder = self.Dropout2(X_Encoder + self.FeedForward(self.LayerNorm2(X_Encoder)))
129
+ return X_Encoder
130
+
131
+ class DecoderBlock(nn.Module):
132
+ def __init__(self, config):
133
+ super().__init__()
134
+ self.LayerNorm1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
135
+ self.LayerNorm2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
136
+ self.LayerNorm3 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
137
+ self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
138
+ self.Dropout2 = nn.Dropout(config[varables.RATE_DROPOUT])
139
+ self.Dropout3 = nn.Dropout(config[varables.RATE_DROPOUT])
140
+ self.AttentionMasked = Attention( config)
141
+ self.AttentionCross = Attention( config)
142
+ self.FeedForward = FeedForward(config)
143
+
144
+ def forward(self, X_Encoder,X_Decoder,Mask_Cross,Mask_Decoder):
145
+ X_Decoder = self.Dropout1(X_Decoder + self.AttentionMasked(self.LayerNorm1(X_Decoder), None, Mask_Decoder))
146
+ X_Decoder = self.Dropout2(X_Decoder + self.AttentionCross ( X_Encoder, self.LayerNorm2(X_Decoder), Mask_Cross ))
147
+ X_Decoder = self.Dropout3(X_Decoder + self.FeedForward (self.LayerNorm3(X_Decoder) ))
148
+ return X_Decoder
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+
166
+
167
+
168
+ class Model(nn.Module):
169
+ def __init__(self, config):
170
+ super().__init__()
171
+ # Varables
172
+ self.Dim_Attention = config[varables.DIM_ATTENTION]
173
+ self.Token_Padding_Encoder = config["Token_Padding_Encoder"]
174
+ self.Token_Padding_Decoder = config["Token_Padding_Decoder"]
175
+ # Embedding and positional encoding layers
176
+ self.Embedding_Encoder = nn.Embedding(len(config["vocab_encoder"]), config[varables.DIM_ATTENTION])
177
+ self.Embedding_Decoder = nn.Embedding(len(config["vocab_decoder"]), config[varables.DIM_ATTENTION])
178
+ self.pos_emb = PositionalEncoder(config)
179
+ # Dropout and normalization layers
180
+ self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
181
+ self.Dropout2 = nn.Dropout(config[varables.RATE_DROPOUT])
182
+ self.LayerNorm1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
183
+ self.LayerNorm2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
184
+ # Transformer layers
185
+ self.encoder_blocks = nn.ModuleList([EncoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
186
+ self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
187
+ # Output layer
188
+ self.head = nn.Linear(config[varables.DIM_ATTENTION], len(config["vocab_decoder"]), bias=False)
189
+ # Init
190
+ self.apply(self._init_weights)
191
+ self.optimizer = None
192
+ # logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
193
+
194
+ def _init_weights(self, module):
195
+ for p in module.parameters():
196
+ if p.dim() > 1:
197
+ nn.init.xavier_uniform_(p)
198
+ # if isinstance(module, (nn.Linear, nn.Embedding)):
199
+ # module.weight.data.normal_(mean=0.0, std=0.02)
200
+ # if isinstance(module, nn.Linear) and module.bias is not None:
201
+ # module.bias.data.zero_()
202
+ # elif isinstance(module, nn.LayerNorm):
203
+ # module.bias.data.zero_()
204
+ # module.weight.data.fill_(1.0)
205
+ def init_optimizers(self,train_config):
206
+ optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
207
+ return optimizer
208
+ def init_scheduler(self,train_config):
209
+ scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
210
+ return scheduler
211
+ def get_collate_fn(self, vocab_encoder,vocab_decoder):
212
+ def collate(results):
213
+ X_Encoder = [a[0] for a in results]
214
+ X_Decoder = [a[1] for a in results]
215
+ boundary = -1
216
+ max_len_x = max([len(a) for a in X_Encoder])
217
+ max_len_y = max([len(a) for a in X_Decoder])
218
+ x = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in X_Encoder],dtype=torch.long)
219
+ y = torch.tensor([(a+[vocab_decoder[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in X_Decoder],dtype=torch.long)
220
+ return x,y,boundary
221
+ return collate
222
+
223
+
224
+ def generate_masks(self,X_Encoder, X_Decoder):
225
+ # Generate encoder, decoder, cross masks
226
+ T = X_Decoder.shape[1]
227
+ Mask_Encoder = (X_Encoder != self.Token_Padding_Encoder).unsqueeze(-2).unsqueeze(-2)
228
+ Mask_Decoder = (X_Decoder != self.Token_Padding_Decoder).unsqueeze(-2).unsqueeze(-2).repeat(1,1,T,1)
229
+ Mask_Cross = (X_Encoder != self.Token_Padding_Encoder).unsqueeze(-2).unsqueeze(-2)
230
+ mask_tril = torch.tril(torch.ones(T, T)).view(1, 1, T, T).to(Mask_Decoder.device)
231
+ Mask_Decoder = Mask_Decoder.masked_fill(mask_tril==0,0)
232
+ return Mask_Encoder,Mask_Decoder,Mask_Cross
233
+
234
+ def forward(self, X_Encoder, X_Decoder, Y_Decoder_Ref=None,boundary=None):
235
+ Mask_Encoder, Mask_Decoder,Mask_Cross = self.generate_masks(X_Encoder, X_Decoder)
236
+ # preprocess
237
+ X_Encoder = self.Dropout1(self.Embedding_Encoder(X_Encoder) * math.sqrt(self.Dim_Attention) + self.pos_emb(X_Encoder.size(1)))
238
+ X_Decoder = self.Dropout2(self.Embedding_Decoder(X_Decoder) * math.sqrt(self.Dim_Attention) + self.pos_emb(X_Decoder.size(1)))
239
+ #### Now X_Encoder: BatchSize, SequenceLength, DimAttention
240
+ # Encoder blocks
241
+ for encoder_block in self.encoder_blocks:
242
+ X_Encoder = encoder_block(X_Encoder,Mask_Encoder)
243
+ X_Encoder = self.LayerNorm1(X_Encoder)
244
+ # Decoder blocks
245
+ for decoder_block in self.decoder_blocks:
246
+ X_Decoder = decoder_block(X_Encoder,X_Decoder,Mask_Cross,Mask_Decoder)
247
+ X_Decoder = self.LayerNorm2(X_Decoder)
248
+ Y_Decoder_Logits = self.head(X_Decoder)
249
+ loss = None
250
+ if Y_Decoder_Ref is not None:
251
+ loss = F.cross_entropy(Y_Decoder_Logits.view(-1, Y_Decoder_Logits.size(-1)), Y_Decoder_Ref.view(-1),ignore_index=self.Token_Padding_Decoder)
252
+ return Y_Decoder_Logits, loss
253
+
254
+
255
+
256
+
257
+
258
+
259
+
260
+
261
+
262
+
263
+
264
+
265
+ # def generate_masks(self,X_Encoder, X_Decoder):
266
+ # # Generate encoder, decoder, cross masks
267
+ # Mask_Encoder = (X_Encoder != self.Token_Padding_Encoder).unsqueeze(-2).int().cpu()
268
+ # Mask_Decoder = (X_Decoder != self.Token_Padding_Decoder).unsqueeze(-2).int().cpu()
269
+ # Mask_Cross = Mask_Decoder.unsqueeze(-1) @ Mask_Encoder.unsqueeze(-2)
270
+ # Mask_Encoder = Mask_Encoder.unsqueeze(-1) @ Mask_Encoder.unsqueeze(-2)
271
+ # Mask_Decoder = Mask_Decoder.unsqueeze(-1) @ Mask_Decoder.unsqueeze(-2)
272
+ # T = X_Decoder.shape[1]
273
+ # mask_tril = torch.tril(torch.ones(T, T)).view(1, 1, T, T)
274
+ # Mask_Decoder = Mask_Decoder.masked_fill(mask_tril==0,0)
275
+ # Mask_Encoder = Mask_Encoder.to(X_Encoder.device)
276
+ # Mask_Decoder = Mask_Decoder.to(X_Decoder.device)
277
+ # Mask_Cross = Mask_Cross.to(X_Encoder.device)
278
+ # return Mask_Encoder,Mask_Decoder,Mask_Cross
SCMG/models/Reinvent/sampler.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.nn import functional as F
6
+
7
+ def set_seed(seed):
8
+ random.seed(seed)
9
+ np.random.seed(seed)
10
+ torch.manual_seed(seed)
11
+ torch.cuda.manual_seed_all(seed)
12
+
13
+ def top_k_logits(logits, k):
14
+ v, ix = torch.topk(logits, k)
15
+ out = logits.clone()
16
+ out[out < v[:, [-1]]] = -float('Inf')
17
+ return out
18
+
19
+ @torch.no_grad()
20
+ def sample(model, x, steps, temperature=1.0, sample=False, top_k=None):
21
+ block_size = model.get_block_size()
22
+ model.eval()
23
+ for k in range(steps):
24
+ x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
25
+ logits, _ = model(x_cond)
26
+ logits = logits[:, -1, :] / temperature
27
+ if top_k is not None:
28
+ logits = top_k_logits(logits, top_k)
29
+ probs = F.softmax(logits, dim=-1)
30
+ if sample:
31
+ ix = torch.multinomial(probs, num_samples=1)
32
+ else:
33
+ _, ix = torch.topk(probs, k=1, dim=-1)
34
+ x = torch.cat((x, ix), dim=1)
35
+
36
+ return x
37
+
38
+
39
+
40
+
41
+ @torch.no_grad()
42
+ def sample(model, x, steps, temperature=1.0,boundary=None):
43
+ block_size = model.get_block_size()
44
+ model.eval()
45
+ for k in range(steps):
46
+ x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
47
+ logits, _ = model(x_cond,boundary=boundary)
48
+ logits = logits[:, -1, :] / temperature
49
+ probs = F.softmax(logits, dim=-1)
50
+ ix = torch.multinomial(probs, num_samples=1)
51
+ x = torch.cat((x, ix), dim=1)
52
+ return x
53
+
54
+ 'L_5*C(=O)NCc1cccc(OC)c1.*c1nsc2ccccc12COc1cccc(CNC(=O)c2cccc(NC(=O)c3nsc4ccccc34)c2)c1'
55
+
56
+ # for i in range(1,21):
57
+ def sample_L(i,option='string'):
58
+ # i=2
59
+ prefix = 'L_'+str(i)
60
+ string_input = prefix + '*O=C1NN=Cc2c1cccc2.*O=C(C1CC1)N1CCNCC1'
61
+ array_input = [vocab[a] for a in ['<bos>'] + list(string_input)]
62
+ boundary = [len(array_input)]
63
+ tensor_input = torch.tensor(array_input,device='cuda').unsqueeze(0).repeat(32,1)
64
+ boundary = boundary*32
65
+ tensor_output = sample(model,tensor_input,250,boundary=boundary)
66
+ strings_output = []
67
+ for j in range(tensor_output.shape[0]):
68
+ list_string_output = [inv[a] for a in tensor_output[j,boundary[j]:].cpu().numpy() if a != vocab['<pad>']]
69
+ # if list_string_output[0] == '<bos>':
70
+ # list_string_output = list_string_output[1:]
71
+ if list_string_output[-1] == '<eos>':
72
+ list_string_output = list_string_output[:-1]
73
+ string_output = ''.join(list_string_output)
74
+ strings_output.append(string_output)
75
+ print(string_output)
76
+ for j in range(tensor_output.shape[0]):
77
+ if test_valid(strings_output[j]):
78
+ print(1)
79
+ else:
80
+ print(0)
81
+
82
+ # logits,_ = model(tensor_input,boundary=boundary)
83
+
84
+
85
+ ['<bos>', 'L', '_', '5', '*', 'C', '(', '=', 'O', ')', 'N', 'C', 'c', '1', 'c', 'c', 'c', 'c', '(', 'O', 'C', ')', 'c', '1', '.', '*', 'c', '1', 'n', 's', 'c', '2', 'c', 'c', 'c', 'c', 'c', '1', '2', 'C', 'O', 'c', '1', 'c', 'c', 'c', 'c', '(', 'C', 'N', 'C', '(', '=', 'O', ')', 'c', '2', 'c', 'c', 'c', 'c', '(', 'N', 'C', '(', '=', 'O', ')', 'c', '3', 'n', 's', 'c', '4', 'c', 'c', 'c', 'c', 'c', '3', '4', ')', 'c', '2', ')', 'c', '1', '<eos>']
SCMG/models/Reinvent_Scaffold_Decorator/__init__.py ADDED
File without changes
SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (196 Bytes). View file
 
SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/model copy 2.cpython-310.pyc ADDED
Binary file (14.4 kB). View file
 
SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/model copy.cpython-310.pyc ADDED
Binary file (8.41 kB). View file
 
SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/sampler.cpython-310.pyc ADDED
Binary file (3.19 kB). View file
 
SCMG/models/Reinvent_Scaffold_Decorator/model copy 2.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import logging
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ from torch.nn import functional as F
7
+
8
+ logger = logging.getLogger(__name__)
9
+ from SCMG.config import varables
10
+
11
+ # class ModelConfig():
12
+ # rate_dropout_embedding = 0.1
13
+ # rate_dropout_residue = 0.1
14
+ # rate_dropout_attention = 0.1
15
+ # block_size=125
16
+ # def __init__(self, size_vocab, **kwargs):
17
+ # self.size_vocab = size_vocab
18
+ # for k,v in kwargs.items():
19
+ # setattr(self, k, v)
20
+
21
+ class CausalSelfAttention(nn.Module):
22
+ def __init__(self, config):
23
+ super().__init__()
24
+ assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
25
+ self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
26
+ self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
27
+ self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
28
+ self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
29
+ self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
30
+ self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
31
+ self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
32
+ .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
33
+ self.n_head = config[varables.NUM_HEADS]
34
+ self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
35
+ self.attention_features = config[varables.DIM_ATTENTION]
36
+
37
+ def forward(self, x, layer_past=None):
38
+ B, T, C = x.size()
39
+ k = self.key(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
40
+ q = self.query(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
41
+ v = self.value(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
42
+ att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
43
+ att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
44
+ att = F.softmax(att, dim=-1)
45
+ att = self.dropout_attention(att)
46
+ y = att @ v
47
+ y = y.transpose(1, 2).contiguous().view(B, T, self.attention_features)
48
+ y = self.dropout_residue(self.projection(y))
49
+ return y
50
+
51
+
52
+ class CrossAttention(nn.Module):
53
+ def __init__(self, config):
54
+ super().__init__()
55
+ assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
56
+ self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
57
+ self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
58
+ self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
59
+ self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
60
+ self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
61
+ self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
62
+ self.n_head = config[varables.NUM_HEADS]
63
+ self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
64
+ self.attention_features = config[varables.DIM_ATTENTION]
65
+ self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
66
+ .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
67
+
68
+ def forward(self, x_encoder,x_decoder, layer_past=None):
69
+ B_encoder, T_encoder, C_encoder = x_encoder.size()
70
+ B_decoder, T_decoder, C_decoder = x_decoder.size()
71
+ k = self.key( x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
72
+ q = self.query(x_decoder).view(B_encoder, T_decoder, self.n_head,self.single_head_dim).transpose(1, 2)
73
+ v = self.value(x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
74
+ att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
75
+ att = att.masked_fill(self.mask[:,:,:T_decoder,:T_encoder] == 0, float('-inf'))
76
+ att = F.softmax(att, dim=-1)
77
+ att = self.dropout_attention(att)
78
+ y = att @ v
79
+ y = y.transpose(1, 2).contiguous().view(B_encoder, T_decoder, self.attention_features)
80
+ y = self.dropout_residue(self.projection(y))
81
+ return y
82
+
83
+
84
+
85
+
86
+ class EncoderBlock(nn.Module):
87
+ def __init__(self, config):
88
+ super().__init__()
89
+ self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
90
+ self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
91
+ self.attn = CausalSelfAttention(config)
92
+ self.mlp = nn.Sequential(
93
+ nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
94
+ nn.GELU(),
95
+ nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
96
+ nn.Dropout(config[varables.RATE_DROPOUT]),
97
+ )
98
+
99
+ def forward(self, x):
100
+ # = y_input
101
+ x = x + self.attn(self.ln1(x))
102
+ x = x + self.mlp(self.ln2(x))
103
+ return x
104
+
105
+ class DecoderBlock(nn.Module):
106
+ def __init__(self, config):
107
+ super().__init__()
108
+ self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
109
+ self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
110
+ self.masked_attn = CausalSelfAttention(config)
111
+ self.cross_attn = CrossAttention(config)
112
+ self.mlp = nn.Sequential(
113
+ nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
114
+ nn.GELU(),
115
+ nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
116
+ nn.Dropout(config[varables.RATE_DROPOUT]),
117
+ )
118
+
119
+ def forward(self, x_encoder,x):
120
+ # = y_input
121
+ x = x + self.masked_attn(self.ln1(x))
122
+ x = x + self.cross_attn(x_encoder,self.ln1(x))
123
+ x = x + self.mlp(self.ln2(x))
124
+ return x
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+ import torch
143
+ import torch.nn as nn
144
+ import torch.nn.functional as F
145
+ import math
146
+
147
+
148
+ class Norm(nn.Module):
149
+ def __init__(self, d_model, eps = 1e-6):
150
+ super().__init__()
151
+
152
+ self.size = d_model
153
+
154
+ # create two learnable parameters to calibrate normalisation
155
+ self.alpha = nn.Parameter(torch.ones(self.size))
156
+ self.bias = nn.Parameter(torch.zeros(self.size))
157
+
158
+ self.eps = eps
159
+
160
+ def forward(self, x):
161
+ norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) \
162
+ / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias
163
+ return norm
164
+
165
+ def attention(q, k, v, d_k, mask=None, dropout=None):
166
+
167
+ scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k)
168
+
169
+ if mask is not None:
170
+ mask = mask.unsqueeze(1)
171
+ scores = scores.masked_fill(mask == 0, -1e9)
172
+
173
+ scores = F.softmax(scores, dim=-1)
174
+
175
+ if dropout is not None:
176
+ scores = dropout(scores)
177
+
178
+ output = torch.matmul(scores, v)
179
+ return output
180
+
181
+ class MultiHeadAttention(nn.Module):
182
+ def __init__(self, heads, d_model, dropout = 0.1):
183
+ super().__init__()
184
+
185
+ self.d_model = d_model
186
+ self.d_k = d_model // heads
187
+ self.h = heads
188
+
189
+ self.q_linear = nn.Linear(d_model, d_model)
190
+ self.v_linear = nn.Linear(d_model, d_model)
191
+ self.k_linear = nn.Linear(d_model, d_model)
192
+
193
+ self.dropout = nn.Dropout(dropout)
194
+ self.out = nn.Linear(d_model, d_model)
195
+
196
+ def forward(self, q, k, v, mask=None):
197
+
198
+ bs = q.size(0)
199
+
200
+ # perform linear operation and split into N heads
201
+ k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
202
+ q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
203
+ v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
204
+
205
+ # transpose to get dimensions bs * N * sl * d_model
206
+ k = k.transpose(1,2)
207
+ q = q.transpose(1,2)
208
+ v = v.transpose(1,2)
209
+
210
+
211
+ # calculate attention using function we will define next
212
+ scores = attention(q, k, v, self.d_k, mask, self.dropout)
213
+ # concatenate heads and put through final linear layer
214
+ concat = scores.transpose(1,2).contiguous()\
215
+ .view(bs, -1, self.d_model)
216
+ output = self.out(concat)
217
+
218
+ return output
219
+
220
+ class FeedForward(nn.Module):
221
+ def __init__(self, d_model, d_ff=2048, dropout = 0.1):
222
+ super().__init__()
223
+
224
+ # We set d_ff as a default to 2048
225
+ self.linear_1 = nn.Linear(d_model, d_ff)
226
+ self.dropout = nn.Dropout(dropout)
227
+ self.linear_2 = nn.Linear(d_ff, d_model)
228
+
229
+ def forward(self, x):
230
+ x = self.dropout(F.relu(self.linear_1(x)))
231
+ x = self.linear_2(x)
232
+ return x
233
+
234
+
235
+
236
+
237
+ import torch
238
+ import torch.nn as nn
239
+ import copy
240
+
241
+
242
+ class EncoderLayer(nn.Module):
243
+ def __init__(self, d_model, heads, dropout=0.1):
244
+ super().__init__()
245
+ self.norm_1 = Norm(d_model)
246
+ self.norm_2 = Norm(d_model)
247
+ self.attn = MultiHeadAttention(heads, d_model, dropout=dropout)
248
+ self.ff = FeedForward(d_model, dropout=dropout)
249
+ self.dropout_1 = nn.Dropout(dropout)
250
+ self.dropout_2 = nn.Dropout(dropout)
251
+
252
+ def forward(self, x, mask):
253
+ x2 = self.norm_1(x)
254
+ x = x + self.dropout_1(self.attn(x2,x2,x2,mask))
255
+ x2 = self.norm_2(x)
256
+ x = x + self.dropout_2(self.ff(x2))
257
+ return x
258
+
259
+ # build a decoder layer with two multi-head attention layers and
260
+ # one feed-forward layer
261
+ class DecoderLayer(nn.Module):
262
+ def __init__(self, d_model, heads, dropout=0.1):
263
+ super().__init__()
264
+ self.norm_1 = Norm(d_model)
265
+ self.norm_2 = Norm(d_model)
266
+ self.norm_3 = Norm(d_model)
267
+
268
+ self.dropout_1 = nn.Dropout(dropout)
269
+ self.dropout_2 = nn.Dropout(dropout)
270
+ self.dropout_3 = nn.Dropout(dropout)
271
+
272
+ self.attn_1 = MultiHeadAttention(heads, d_model, dropout=dropout)
273
+ self.attn_2 = MultiHeadAttention(heads, d_model, dropout=dropout)
274
+ self.ff = FeedForward(d_model, dropout=dropout)
275
+
276
+ def forward(self, x, e_outputs, src_mask, trg_mask):
277
+ x2 = self.norm_1(x)
278
+ x = x + self.dropout_1(self.attn_1(x2, x2, x2, trg_mask))
279
+ x2 = self.norm_2(x)
280
+ x = x + self.dropout_2(self.attn_2(x2, e_outputs, e_outputs, \
281
+ src_mask))
282
+ x2 = self.norm_3(x)
283
+ x = x + self.dropout_3(self.ff(x2))
284
+ return x
285
+
286
+
287
+ import torch
288
+ import torch.nn as nn
289
+ import math
290
+ from torch.autograd import Variable
291
+
292
+ class Embedder(nn.Module):
293
+ def __init__(self, vocab_size, d_model):
294
+ super().__init__()
295
+ self.d_model = d_model
296
+ self.embed = nn.Embedding(vocab_size, d_model)
297
+ def forward(self, x):
298
+ return self.embed(x)
299
+
300
+ class PositionalEncoder(nn.Module):
301
+ def __init__(self, d_model, max_seq_len = 200, dropout = 0.1):
302
+ super().__init__()
303
+ self.d_model = d_model
304
+ self.dropout = nn.Dropout(dropout)
305
+ # create constant 'pe' matrix with values dependant on
306
+ # pos and i
307
+ pe = torch.zeros(max_seq_len, d_model)
308
+ for pos in range(max_seq_len):
309
+ for i in range(0, d_model, 2):
310
+ pe[pos, i] = \
311
+ math.sin(pos / (10000 ** ((2 * i)/d_model)))
312
+ pe[pos, i + 1] = \
313
+ math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
314
+ pe = pe.unsqueeze(0)
315
+ self.register_buffer('pe', pe)
316
+
317
+
318
+ def forward(self, x):
319
+ # make embeddings relatively larger
320
+ x = x * math.sqrt(self.d_model)
321
+ #add constant to embedding
322
+ seq_len = x.size(1)
323
+ pe = Variable(self.pe[:,:seq_len], requires_grad=False)
324
+ if x.is_cuda:
325
+ pe.cuda()
326
+ x = x + pe
327
+ return self.dropout(x)
328
+
329
+ def get_clones(module, N):
330
+ return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
331
+
332
+ class Encoder(nn.Module):
333
+ def __init__(self, vocab_size, d_model, N, heads, dropout):
334
+ super().__init__()
335
+ self.N = N
336
+ self.embed = Embedder(vocab_size, d_model)
337
+ self.pe = PositionalEncoder(d_model, dropout=dropout)
338
+ self.layers = get_clones(EncoderLayer(d_model, heads, dropout), N)
339
+ self.norm = Norm(d_model)
340
+ def forward(self, src, mask):
341
+ x = self.embed(src)
342
+ x = self.pe(x)
343
+ for i in range(self.N):
344
+ x = self.layers[i](x, mask)
345
+ return self.norm(x)
346
+
347
+ class Decoder(nn.Module):
348
+ def __init__(self, vocab_size, d_model, N, heads, dropout):
349
+ super().__init__()
350
+ self.N = N
351
+ self.embed = Embedder(vocab_size, d_model)
352
+ self.pe = PositionalEncoder(d_model, dropout=dropout)
353
+ self.layers = get_clones(DecoderLayer(d_model, heads, dropout), N)
354
+ self.norm = Norm(d_model)
355
+ def forward(self, trg, e_outputs, src_mask, trg_mask):
356
+ x = self.embed(trg)
357
+ x = self.pe(x)
358
+ for i in range(self.N):
359
+ x = self.layers[i](x, e_outputs, src_mask, trg_mask)
360
+ return self.norm(x)
361
+
362
+ class Model(nn.Module):
363
+ def __init__(self, config):
364
+ super().__init__()
365
+ self.encoder = Encoder(len(config["vocab_encoder"]), config[varables.DIM_ATTENTION], config[varables.NUM_LAYERS], config[varables.NUM_HEADS], config[varables.RATE_DROPOUT])
366
+ self.decoder = Decoder(len(config["vocab_decoder"]), config[varables.DIM_ATTENTION], config[varables.NUM_LAYERS], config[varables.NUM_HEADS], config[varables.RATE_DROPOUT])
367
+ self.out = nn.Linear(config[varables.DIM_ATTENTION], len(config["vocab_decoder"]))
368
+ # self.tok_emb = nn.Embedding(config[varables.SIZE_VOCAB], config[varables.DIM_EMBEDDING])
369
+ # self.pos_emb = nn.Parameter(torch.zeros(1, config[varables.SIZE_BLOCK], config[varables.DIM_EMBEDDING]))
370
+ # self.drop = nn.Dropout(config[varables.RATE_DROPOUT])
371
+ # self.encoder_blocks = nn.ModuleList([EncoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
372
+ # self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
373
+ # self.blocks = nn.Sequential(*[DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
374
+ # self.ln_f = nn.LayerNorm(config[varables.DIM_EMBEDDING])
375
+ # self.head = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.SIZE_VOCAB], bias=False)
376
+ # self.block_size = config[varables.SIZE_BLOCK]
377
+ # self.apply(self._init_weights)
378
+ # logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
379
+ self.optimizer = None
380
+
381
+ def get_block_size(self):
382
+ return self.block_size
383
+
384
+ def _init_weights(self, module):
385
+ if isinstance(module, (nn.Linear, nn.Embedding)):
386
+ module.weight.data.normal_(mean=0.0, std=0.02)
387
+ if isinstance(module, nn.Linear) and module.bias is not None:
388
+ module.bias.data.zero_()
389
+ elif isinstance(module, nn.LayerNorm):
390
+ module.bias.data.zero_()
391
+ module.weight.data.fill_(1.0)
392
+ def init_optimizers(self,train_config):
393
+ optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
394
+ return optimizer
395
+ def init_scheduler(self,train_config):
396
+ scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
397
+ return scheduler
398
+ def get_collate_fn(self, vocab_encoder,vocab_decoder):
399
+ def collate(results):
400
+ x_in = [a[0] for a in results]
401
+ y_in = [a[1] for a in results]
402
+ boundary = -1
403
+ max_len_x = max([len(a) for a in x_in])
404
+ max_len_y = max([len(a) for a in y_in])
405
+ x = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in x_in],dtype=torch.long)
406
+ y = torch.tensor([(a+[vocab_decoder[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in y_in],dtype=torch.long)
407
+ return x,y,boundary
408
+ return collate
409
+ def forward(self, src, trg, trg_out, boundary=None):
410
+ src_mask = None
411
+ trg_mask = torch.tril(torch.ones(trg.shape[1], trg.shape[1])).view(1, 1, trg.shape[1], trg.shape[1]).to(trg.device)
412
+ e_outputs = self.encoder(src, src_mask)
413
+ d_output = self.decoder(trg, e_outputs, src_mask, trg_mask)
414
+ logits = self.out(d_output)
415
+ loss = None
416
+ if trg_out is not None:
417
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), trg_out.view(-1))
418
+ return logits, loss
419
+
420
+ # mark test
SCMG/models/Reinvent_Scaffold_Decorator/model copy.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import logging
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ from torch.nn import functional as F
7
+
8
+ logger = logging.getLogger(__name__)
9
+ from SCMG.config import varables
10
+
11
+ # class ModelConfig():
12
+ # rate_dropout_embedding = 0.1
13
+ # rate_dropout_residue = 0.1
14
+ # rate_dropout_attention = 0.1
15
+ # block_size=125
16
+ # def __init__(self, size_vocab, **kwargs):
17
+ # self.size_vocab = size_vocab
18
+ # for k,v in kwargs.items():
19
+ # setattr(self, k, v)
20
+
21
+ class CausalSelfAttention(nn.Module):
22
+ def __init__(self, config):
23
+ super().__init__()
24
+ assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
25
+ self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
26
+ self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
27
+ self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
28
+ self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
29
+ self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
30
+ self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
31
+ self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
32
+ .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
33
+ self.n_head = config[varables.NUM_HEADS]
34
+ self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
35
+ self.attention_features = config[varables.DIM_ATTENTION]
36
+
37
+ def forward(self, x, layer_past=None):
38
+ B, T, C = x.size()
39
+ k = self.key(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
40
+ q = self.query(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
41
+ v = self.value(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
42
+ att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
43
+ att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
44
+ att = F.softmax(att, dim=-1)
45
+ att = self.dropout_attention(att)
46
+ y = att @ v
47
+ y = y.transpose(1, 2).contiguous().view(B, T, self.attention_features)
48
+ y = self.dropout_residue(self.projection(y))
49
+ return y
50
+
51
+
52
+ class CrossAttention(nn.Module):
53
+ def __init__(self, config):
54
+ super().__init__()
55
+ assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
56
+ self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
57
+ self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
58
+ self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
59
+ self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
60
+ self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
61
+ self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
62
+ self.n_head = config[varables.NUM_HEADS]
63
+ self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
64
+ self.attention_features = config[varables.DIM_ATTENTION]
65
+ self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
66
+ .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
67
+
68
+ def forward(self, x_encoder,x_decoder, layer_past=None):
69
+ B_encoder, T_encoder, C_encoder = x_encoder.size()
70
+ B_decoder, T_decoder, C_decoder = x_decoder.size()
71
+ k = self.key( x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
72
+ q = self.query(x_decoder).view(B_encoder, T_decoder, self.n_head,self.single_head_dim).transpose(1, 2)
73
+ v = self.value(x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
74
+ att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
75
+ att = att.masked_fill(self.mask[:,:,:T_decoder,:T_encoder] == 0, float('-inf'))
76
+ att = F.softmax(att, dim=-1)
77
+ att = self.dropout_attention(att)
78
+ y = att @ v
79
+ y = y.transpose(1, 2).contiguous().view(B_encoder, T_decoder, self.attention_features)
80
+ y = self.dropout_residue(self.projection(y))
81
+ return y
82
+
83
+
84
+
85
+
86
+ class EncoderBlock(nn.Module):
87
+ def __init__(self, config):
88
+ super().__init__()
89
+ self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
90
+ self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
91
+ self.attn = CausalSelfAttention(config)
92
+ self.mlp = nn.Sequential(
93
+ nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
94
+ nn.GELU(),
95
+ nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
96
+ nn.Dropout(config[varables.RATE_DROPOUT]),
97
+ )
98
+
99
+ def forward(self, x):
100
+ # = y_input
101
+ x = x + self.attn(self.ln1(x))
102
+ x = x + self.mlp(self.ln2(x))
103
+ return x
104
+
105
+ class DecoderBlock(nn.Module):
106
+ def __init__(self, config):
107
+ super().__init__()
108
+ self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
109
+ self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
110
+ self.masked_attn = CausalSelfAttention(config)
111
+ self.cross_attn = CrossAttention(config)
112
+ self.mlp = nn.Sequential(
113
+ nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
114
+ nn.GELU(),
115
+ nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
116
+ nn.Dropout(config[varables.RATE_DROPOUT]),
117
+ )
118
+
119
+ def forward(self, x_encoder,x):
120
+ # = y_input
121
+ x = x + self.masked_attn(self.ln1(x))
122
+ x = x + self.cross_attn(x_encoder,self.ln1(x))
123
+ x = x + self.mlp(self.ln2(x))
124
+ return x
125
+
126
+ class Model(nn.Module):
127
+ def __init__(self, config):
128
+ super().__init__()
129
+ self.tok_emb = nn.Embedding(config[varables.SIZE_VOCAB], config[varables.DIM_EMBEDDING])
130
+ self.pos_emb = nn.Parameter(torch.zeros(1, config[varables.SIZE_BLOCK], config[varables.DIM_EMBEDDING]))
131
+ self.drop = nn.Dropout(config[varables.RATE_DROPOUT])
132
+ self.encoder_blocks = nn.ModuleList([EncoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
133
+ self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
134
+ # self.blocks = nn.Sequential(*[DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
135
+ self.ln_f = nn.LayerNorm(config[varables.DIM_EMBEDDING])
136
+ self.head = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.SIZE_VOCAB], bias=False)
137
+ self.block_size = config[varables.SIZE_BLOCK]
138
+ self.apply(self._init_weights)
139
+ logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
140
+ self.optimizer = None
141
+
142
+ def get_block_size(self):
143
+ return self.block_size
144
+
145
+ def _init_weights(self, module):
146
+ if isinstance(module, (nn.Linear, nn.Embedding)):
147
+ module.weight.data.normal_(mean=0.0, std=0.02)
148
+ if isinstance(module, nn.Linear) and module.bias is not None:
149
+ module.bias.data.zero_()
150
+ elif isinstance(module, nn.LayerNorm):
151
+ module.bias.data.zero_()
152
+ module.weight.data.fill_(1.0)
153
+ def init_optimizers(self,train_config):
154
+ optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
155
+ return optimizer
156
+ def init_scheduler(self,train_config):
157
+ scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
158
+ return scheduler
159
+ def get_collate_fn(self, vocab):
160
+ def collate(results):
161
+ x_in = [a[0] for a in results]
162
+ y_in = [a[1] for a in results]
163
+ boundary = -1
164
+ max_len_x = max([len(a) for a in x_in])
165
+ max_len_y = max([len(a) for a in y_in])
166
+ x = torch.tensor([(a+[vocab[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in x_in],dtype=torch.long)
167
+ y = torch.tensor([(a+[vocab[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in y_in],dtype=torch.long)
168
+ return x,y,boundary
169
+ return collate
170
+
171
+ def forward(self, x_in, y_in, y_out=None,boundary=None):
172
+ x_in = self.drop(self.tok_emb(x_in) + self.pos_emb[:, :x_in.size()[1], :])
173
+ y_in = self.drop(self.tok_emb(y_in) + self.pos_emb[:, :y_in.size()[1], :])
174
+ #
175
+ for encoder_block in self.encoder_blocks:
176
+ x_in = encoder_block(x_in)
177
+ x_in = self.ln_f(x_in)
178
+ for decoder_block in self.decoder_blocks:
179
+ y_in = decoder_block(x_in,y_in)
180
+ y_in = self.ln_f(y_in)
181
+ logits = self.head(y_in)
182
+ loss = None
183
+ if y_out is not None:
184
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), y_out.view(-1))
185
+ return logits, loss
186
+
187
+ # mark test
SCMG/models/Reinvent_Scaffold_Decorator/model.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Skip to content
3
+
4
+ Why GitHub?
5
+
6
+ Team
7
+ Enterprise
8
+ Explore
9
+ Marketplace
10
+ Pricing
11
+
12
+ Sign in
13
+ Sign up
14
+ undeadpixel /
15
+ reinvent-scaffold-decorator
16
+ Public
17
+
18
+ Code
19
+ Issues 3
20
+ Pull requests
21
+ Actions
22
+ Projects
23
+ Wiki
24
+ Security
25
+
26
+ Insights
27
+
28
+ reinvent-scaffold-decorator/models/model.py /
29
+ Arús-Pous, Josep updated to revised version
30
+ Latest commit 37d0a8a on May 8, 2020
31
+ History
32
+ 0 contributors
33
+ 136 lines (118 sloc) 5.75 KB
34
+ """
35
+ Model class.
36
+ """
37
+
38
+ import torch
39
+ import torch.nn as tnn
40
+
41
+ import models.decorator as mdec
42
+
43
+
44
+ class DecoratorModel:
45
+
46
+ def __init__(self, vocabulary, decorator, max_sequence_length=256, no_cuda=False, mode="train"):
47
+ """
48
+ Implements the likelihood and sampling functions of the decorator model.
49
+ :param vocabulary: A DecoratorVocabulary instance with the vocabularies of both the encoder and decoder.
50
+ :param network_params: A dict with parameters for the encoder and decoder networks.
51
+ :param decorator: An decorator network instance.
52
+ :param max_sequence_length: Maximium number of tokens allowed to sample.
53
+ :param no_cuda: Forces the model not to use CUDA, even if it is available.
54
+ :param mode: Mode in which the model should be initialized.
55
+ :return:
56
+ """
57
+ self.vocabulary = vocabulary
58
+ self.max_sequence_length = max_sequence_length
59
+ self.network = decorator
60
+
61
+ if torch.cuda.is_available() and not no_cuda:
62
+ self.network.cuda()
63
+
64
+ self._nll_loss = tnn.NLLLoss(reduction="none", ignore_index=0)
65
+ self.set_mode(mode)
66
+
67
+ @classmethod
68
+ def load_from_file(cls, path, mode="train"):
69
+ """
70
+ Loads a model from a single file
71
+ :param path: Path to the saved model.
72
+ :param mode: Mode in which the model should be initialized.
73
+ :return: An instance of the RNN.
74
+ """
75
+ data = torch.load(path)
76
+
77
+ decorator = mdec.Decorator(**data["decorator"]["params"])
78
+ decorator.load_state_dict(data["decorator"]["state"])
79
+
80
+ model = DecoratorModel(
81
+ decorator=decorator,
82
+ mode=mode,
83
+ **data["model"]
84
+ )
85
+
86
+ return model
87
+
88
+ def save(self, path):
89
+ """
90
+ Saves the model to a file.
91
+ :param path: Path to the file which the model will be saved to.
92
+ """
93
+ save_dict = {
94
+ 'model': {
95
+ 'vocabulary': self.vocabulary,
96
+ 'max_sequence_length': self.max_sequence_length
97
+ },
98
+ 'decorator': {
99
+ 'params': self.network.get_params(),
100
+ 'state': self.network.state_dict()
101
+ }
102
+ }
103
+ torch.save(save_dict, path)
104
+
105
+ def set_mode(self, mode):
106
+ """
107
+ Changes the mode of the RNN to training or eval.
108
+ :param mode: Mode to change to (training, eval)
109
+ :return: The model instance.
110
+ """
111
+ if mode == "sampling" or mode == "eval":
112
+ self.network.eval()
113
+ else:
114
+ self.network.train()
115
+ return self
116
+
117
+ def likelihood(self, scaffold_seqs, scaffold_seq_lengths, decoration_seqs, decoration_seq_lengths, with_attention_weights=False):
118
+ """
119
+ Retrieves the likelihood of a scaffold and its respective decorations.
120
+ :param scaffold_seqs: (batch, seq) A batch of padded scaffold sequences.
121
+ :param scaffold_seq_lengths: The length of the scaffold sequences (for packing purposes).
122
+ :param decoration_seqs: (batch, seq) A batch of decorator sequences.
123
+ :param decoration_seq_lengths: The length of the decorator sequences (for packing purposes).
124
+ :return: (batch) Log likelihood for each item in the batch.
125
+ """
126
+
127
+ # NOTE: the decoration_seq_lengths have a - 1 to prevent the end token to be forward-passed.
128
+ logits, attention_weights = self.network(scaffold_seqs, scaffold_seq_lengths, decoration_seqs,
129
+ decoration_seq_lengths - 1) # (batch, seq - 1, voc)
130
+ log_probs = logits.log_softmax(dim=2).transpose(1, 2) # (batch, voc, seq - 1)
131
+
132
+ logits = self._nll_loss(log_probs, decoration_seqs[:, 1:]).sum(dim=1) # (batch)
133
+ if with_attention_weights:
134
+ return logits, attention_weights
135
+ else:
136
+ return logits
137
+
138
+ @torch.no_grad()
139
+ def sample_decorations(self, scaffold_seqs, scaffold_seq_lengths):
140
+ """
141
+ Samples as many decorations as scaffolds in the tensor.
142
+ :param scaffold_seqs: A tensor with the scaffolds to sample already encoded and padded.
143
+ :param scaffold_seq_lengths: A tensor with the length of the scaffolds.
144
+ :return: An iterator with (scaffold_smi, decoration_smi, nll) triplets.
145
+ """
146
+ batch_size = scaffold_seqs.size(0)
147
+ input_vector = torch.full(
148
+ (batch_size, 1), self.vocabulary.decoration_vocabulary["^"], dtype=torch.long).cuda() # (batch, 1)
149
+ seq_lengths = torch.ones(batch_size) # (batch)
150
+ encoder_padded_seqs, hidden_states = self.network.forward_encoder(scaffold_seqs, scaffold_seq_lengths)
151
+ nlls = torch.zeros(batch_size).cuda()
152
+ not_finished = torch.ones(batch_size, 1, dtype=torch.long).cuda()
153
+ sequences = []
154
+ for _ in range(self.max_sequence_length - 1):
155
+ logits, hidden_states, _ = self.network.forward_decoder(
156
+ input_vector, seq_lengths, encoder_padded_seqs, hidden_states) # (batch, 1, voc)
157
+ probs = logits.softmax(dim=2).squeeze() # (batch, voc)
158
+ log_probs = logits.log_softmax(dim=2).squeeze() # (batch, voc)
159
+ input_vector = torch.multinomial(probs, 1)*not_finished # (batch, 1)
160
+ sequences.append(input_vector)
161
+ nlls += self._nll_loss(log_probs, input_vector.squeeze())
162
+ not_finished = (input_vector > 1).type(torch.long) # 0 is padding, 1 is end token
163
+ if not_finished.sum() == 0:
164
+ break
165
+
166
+ decoration_smiles = [self.vocabulary.decode_decoration(seq)
167
+ for seq in torch.cat(sequences, 1).data.cpu().numpy()]
168
+ scaffold_smiles = [self.vocabulary.decode_scaffold(seq) for seq in scaffold_seqs.data.cpu().numpy()]
169
+ return zip(scaffold_smiles, decoration_smiles, nlls.data.cpu().numpy().tolist())
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+
178
+ class Model(nn.Module):
179
+ def __init__(self, config):
180
+ super().__init__()
181
+ # Varables
182
+ self.Dim_Attention = config[varables.DIM_ATTENTION]
183
+ self.Token_Padding_Encoder = config["Token_Padding_Encoder"]
184
+ self.Token_Padding_Decoder = config["Token_Padding_Decoder"]
185
+ # Embedding and positional encoding layers
186
+ self.Embedding_Encoder = nn.Embedding(len(config["vocab_encoder"]), config[varables.DIM_ATTENTION])
187
+ self.Embedding_Decoder = nn.Embedding(len(config["vocab_decoder"]), config[varables.DIM_ATTENTION])
188
+ self.pos_emb = PositionalEncoder(config)
189
+ # Dropout and normalization layers
190
+ self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
191
+ self.Dropout2 = nn.Dropout(config[varables.RATE_DROPOUT])
192
+ self.LayerNorm1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
193
+ self.LayerNorm2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
194
+ # Transformer layers
195
+ self.encoder_blocks = nn.ModuleList([EncoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
196
+ self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
197
+ # Output layer
198
+ self.head = nn.Linear(config[varables.DIM_ATTENTION], len(config["vocab_decoder"]), bias=False)
199
+ # Init
200
+ self.apply(self._init_weights)
201
+ self.optimizer = None
202
+ # logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
203
+
204
+ def _init_weights(self, module):
205
+ for p in module.parameters():
206
+ if p.dim() > 1:
207
+ nn.init.xavier_uniform_(p)
208
+ # if isinstance(module, (nn.Linear, nn.Embedding)):
209
+ # module.weight.data.normal_(mean=0.0, std=0.02)
210
+ # if isinstance(module, nn.Linear) and module.bias is not None:
211
+ # module.bias.data.zero_()
212
+ # elif isinstance(module, nn.LayerNorm):
213
+ # module.bias.data.zero_()
214
+ # module.weight.data.fill_(1.0)
215
+ def init_optimizers(self,train_config):
216
+ optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
217
+ return optimizer
218
+ def init_scheduler(self,train_config):
219
+ scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
220
+ return scheduler
221
+ def get_collate_fn(self, vocab_encoder,vocab_decoder):
222
+ def collate(results):
223
+ X_Encoder = [a[0] for a in results]
224
+ X_Decoder = [a[1] for a in results]
225
+ boundary = -1
226
+ max_len_x = max([len(a) for a in X_Encoder])
227
+ max_len_y = max([len(a) for a in X_Decoder])
228
+ x = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in X_Encoder],dtype=torch.long)
229
+ y = torch.tensor([(a+[vocab_decoder[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in X_Decoder],dtype=torch.long)
230
+ return x,y,boundary
231
+ return collate
232
+
233
+ def generate_masks(self,X_Encoder, X_Decoder):
234
+ # Generate encoder, decoder, cross masks
235
+ T = X_Decoder.shape[1]
236
+ Mask_Encoder = (X_Encoder != self.Token_Padding_Encoder).unsqueeze(-2).unsqueeze(-2)
237
+ Mask_Decoder = (X_Decoder != self.Token_Padding_Decoder).unsqueeze(-2).unsqueeze(-2).repeat(1,1,T,1)
238
+ Mask_Cross = (X_Encoder != self.Token_Padding_Encoder).unsqueeze(-2).unsqueeze(-2)
239
+ mask_tril = torch.tril(torch.ones(T, T)).view(1, 1, T, T).to(Mask_Decoder.device)
240
+ Mask_Decoder = Mask_Decoder.masked_fill(mask_tril==0,0)
241
+ return Mask_Encoder,Mask_Decoder,Mask_Cross
242
+
243
+ def forward(self, X_Encoder, X_Decoder, Y_Decoder_Ref=None,boundary=None):
244
+ Mask_Encoder, Mask_Decoder,Mask_Cross = self.generate_masks(X_Encoder, X_Decoder)
245
+ # preprocess
246
+ X_Encoder = self.Dropout1(self.Embedding_Encoder(X_Encoder) * math.sqrt(self.Dim_Attention) + self.pos_emb(X_Encoder.size(1)))
247
+ X_Decoder = self.Dropout2(self.Embedding_Decoder(X_Decoder) * math.sqrt(self.Dim_Attention) + self.pos_emb(X_Decoder.size(1)))
248
+ #### Now X_Encoder: BatchSize, SequenceLength, DimAttention
249
+ # Encoder blocks
250
+ for encoder_block in self.encoder_blocks:
251
+ X_Encoder = encoder_block(X_Encoder,Mask_Encoder)
252
+ X_Encoder = self.LayerNorm1(X_Encoder)
253
+ # Decoder blocks
254
+ for decoder_block in self.decoder_blocks:
255
+ X_Decoder = decoder_block(X_Encoder,X_Decoder,Mask_Cross,Mask_Decoder)
256
+ X_Decoder = self.LayerNorm2(X_Decoder)
257
+ Y_Decoder_Logits = self.head(X_Decoder)
258
+ loss = None
259
+ if Y_Decoder_Ref is not None:
260
+ loss = F.cross_entropy(Y_Decoder_Logits.view(-1, Y_Decoder_Logits.size(-1)), Y_Decoder_Ref.view(-1),ignore_index=self.Token_Padding_Decoder)
261
+ return Y_Decoder_Logits, loss
262
+
263
+ # def generate_masks(self,X_Encoder, X_Decoder):
264
+ # # Generate encoder, decoder, cross masks
265
+ # Mask_Encoder = (X_Encoder != self.Token_Padding_Encoder).unsqueeze(-2).int().cpu()
266
+ # Mask_Decoder = (X_Decoder != self.Token_Padding_Decoder).unsqueeze(-2).int().cpu()
267
+ # Mask_Cross = Mask_Decoder.unsqueeze(-1) @ Mask_Encoder.unsqueeze(-2)
268
+ # Mask_Encoder = Mask_Encoder.unsqueeze(-1) @ Mask_Encoder.unsqueeze(-2)
269
+ # Mask_Decoder = Mask_Decoder.unsqueeze(-1) @ Mask_Decoder.unsqueeze(-2)
270
+ # T = X_Decoder.shape[1]
271
+ # mask_tril = torch.tril(torch.ones(T, T)).view(1, 1, T, T)
272
+ # Mask_Decoder = Mask_Decoder.masked_fill(mask_tril==0,0)
273
+ # Mask_Encoder = Mask_Encoder.to(X_Encoder.device)
274
+ # Mask_Decoder = Mask_Decoder.to(X_Decoder.device)
275
+ # Mask_Cross = Mask_Cross.to(X_Encoder.device)
276
+ # return Mask_Encoder,Mask_Decoder,Mask_Cross
SCMG/models/Reinvent_Scaffold_Decorator/sampler.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.nn import functional as F
6
+
7
+ def set_seed(seed):
8
+ random.seed(seed)
9
+ np.random.seed(seed)
10
+ torch.manual_seed(seed)
11
+ torch.cuda.manual_seed_all(seed)
12
+
13
+ def top_k_logits(logits, k):
14
+ v, ix = torch.topk(logits, k)
15
+ out = logits.clone()
16
+ out[out < v[:, [-1]]] = -float('Inf')
17
+ return out
18
+
19
+ @torch.no_grad()
20
+ def sample(model, x, steps, temperature=1.0, sample=False, top_k=None):
21
+ block_size = model.get_block_size()
22
+ model.eval()
23
+ for k in range(steps):
24
+ x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
25
+ logits, _ = model(x_cond)
26
+ logits = logits[:, -1, :] / temperature
27
+ if top_k is not None:
28
+ logits = top_k_logits(logits, top_k)
29
+ probs = F.softmax(logits, dim=-1)
30
+ if sample:
31
+ ix = torch.multinomial(probs, num_samples=1)
32
+ else:
33
+ _, ix = torch.topk(probs, k=1, dim=-1)
34
+ x = torch.cat((x, ix), dim=1)
35
+
36
+ return x
37
+
38
+
39
+
40
+
41
+ @torch.no_grad()
42
+ def sample(model, x, steps, temperature=1.0,boundary=None):
43
+ block_size = model.get_block_size()
44
+ model.eval()
45
+ for k in range(steps):
46
+ x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
47
+ logits, _ = model(x_cond,boundary=boundary)
48
+ logits = logits[:, -1, :] / temperature
49
+ probs = F.softmax(logits, dim=-1)
50
+ ix = torch.multinomial(probs, num_samples=1)
51
+ x = torch.cat((x, ix), dim=1)
52
+ return x
53
+
54
+ 'L_5*C(=O)NCc1cccc(OC)c1.*c1nsc2ccccc12COc1cccc(CNC(=O)c2cccc(NC(=O)c3nsc4ccccc34)c2)c1'
55
+
56
+ # for i in range(1,21):
57
+ def sample_L(i,option='string'):
58
+ # i=2
59
+ prefix = 'L_'+str(i)
60
+ string_input = prefix + '*O=C1NN=Cc2c1cccc2.*O=C(C1CC1)N1CCNCC1'
61
+ array_input = [vocab[a] for a in ['<bos>'] + list(string_input)]
62
+ boundary = [len(array_input)]
63
+ tensor_input = torch.tensor(array_input,device='cuda').unsqueeze(0).repeat(32,1)
64
+ boundary = boundary*32
65
+ tensor_output = sample(model,tensor_input,250,boundary=boundary)
66
+ strings_output = []
67
+ for j in range(tensor_output.shape[0]):
68
+ list_string_output = [inv[a] for a in tensor_output[j,boundary[j]:].cpu().numpy() if a != vocab['<pad>']]
69
+ # if list_string_output[0] == '<bos>':
70
+ # list_string_output = list_string_output[1:]
71
+ if list_string_output[-1] == '<eos>':
72
+ list_string_output = list_string_output[:-1]
73
+ string_output = ''.join(list_string_output)
74
+ strings_output.append(string_output)
75
+ print(string_output)
76
+ for j in range(tensor_output.shape[0]):
77
+ if test_valid(strings_output[j]):
78
+ print(1)
79
+ else:
80
+ print(0)
81
+
82
+ # logits,_ = model(tensor_input,boundary=boundary)
83
+
84
+
85
+ ['<bos>', 'L', '_', '5', '*', 'C', '(', '=', 'O', ')', 'N', 'C', 'c', '1', 'c', 'c', 'c', 'c', '(', 'O', 'C', ')', 'c', '1', '.', '*', 'c', '1', 'n', 's', 'c', '2', 'c', 'c', 'c', 'c', 'c', '1', '2', 'C', 'O', 'c', '1', 'c', 'c', 'c', 'c', '(', 'C', 'N', 'C', '(', '=', 'O', ')', 'c', '2', 'c', 'c', 'c', 'c', '(', 'N', 'C', '(', '=', 'O', ')', 'c', '3', 'n', 's', 'c', '4', 'c', 'c', 'c', 'c', 'c', '3', '4', ')', 'c', '2', ')', 'c', '1', '<eos>']
SCMG/models/Transformer/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .model import *
SCMG/models/Transformer/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (202 Bytes). View file
 
SCMG/models/Transformer/__pycache__/model copy 2.cpython-310.pyc ADDED
Binary file (8.45 kB). View file