Spaces:

HITLDD
/

Joey

Sleeping

App Files Files Community

Joey Callanan commited on Dec 10, 2025

Commit

e2b7617

1 Parent(s): 44c0eb3

adding SCMG

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

SCMG/__pycache__/_version.cpython-310.pyc +0 -0
SCMG/_version.py +2 -0
SCMG/config/__init__.py +0 -0
SCMG/config/__pycache__/__init__.cpython-310.pyc +0 -0
SCMG/config/__pycache__/modelparameters.cpython-310.pyc +0 -0
SCMG/config/__pycache__/varables.cpython-310.pyc +0 -0
SCMG/config/modelparameters.py +21 -0
SCMG/config/varables.py +234 -0
SCMG/models/GPT/__init__.py +0 -0
SCMG/models/GPT/__pycache__/__init__.cpython-310.pyc +0 -0
SCMG/models/GPT/__pycache__/model.cpython-310.pyc +0 -0
SCMG/models/GPT/__pycache__/sampler.cpython-310.pyc +0 -0
SCMG/models/GPT/model.py +197 -0
SCMG/models/GPT/sampler.py +85 -0
SCMG/models/GPT2/__init__.py +0 -0
SCMG/models/GPT2/__pycache__/__init__.cpython-310.pyc +0 -0
SCMG/models/GPT2/__pycache__/model.cpython-310.pyc +0 -0
SCMG/models/GPT2/__pycache__/sampler.cpython-310.pyc +0 -0
SCMG/models/GPT2/model.py +197 -0
SCMG/models/GPT2/sampler.py +85 -0
SCMG/models/LSTM/__init__.py +0 -0
SCMG/models/LSTM/__pycache__/__init__.cpython-310.pyc +0 -0
SCMG/models/LSTM/__pycache__/model.cpython-310.pyc +0 -0
SCMG/models/LSTM/__pycache__/sampler.cpython-310.pyc +0 -0
SCMG/models/LSTM/__pycache__/trainer.cpython-310.pyc +0 -0
SCMG/models/LSTM/model.py +48 -0
SCMG/models/LSTM/sampler.py +20 -0
SCMG/models/LSTM/trainer.py +195 -0
SCMG/models/Reinvent/__init__.py +0 -0
SCMG/models/Reinvent/__pycache__/__init__.cpython-310.pyc +0 -0
SCMG/models/Reinvent/__pycache__/model copy 2.cpython-310.pyc +0 -0
SCMG/models/Reinvent/__pycache__/model copy.cpython-310.pyc +0 -0
SCMG/models/Reinvent/__pycache__/model.cpython-310.pyc +0 -0
SCMG/models/Reinvent/__pycache__/sampler.cpython-310.pyc +0 -0
SCMG/models/Reinvent/model copy 2.py +420 -0
SCMG/models/Reinvent/model copy.py +187 -0
SCMG/models/Reinvent/model.py +278 -0
SCMG/models/Reinvent/sampler.py +85 -0
SCMG/models/Reinvent_Scaffold_Decorator/__init__.py +0 -0
SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/__init__.cpython-310.pyc +0 -0
SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/model copy 2.cpython-310.pyc +0 -0
SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/model copy.cpython-310.pyc +0 -0
SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/sampler.cpython-310.pyc +0 -0
SCMG/models/Reinvent_Scaffold_Decorator/model copy 2.py +420 -0
SCMG/models/Reinvent_Scaffold_Decorator/model copy.py +187 -0
SCMG/models/Reinvent_Scaffold_Decorator/model.py +276 -0
SCMG/models/Reinvent_Scaffold_Decorator/sampler.py +85 -0
SCMG/models/Transformer/__init__.py +1 -0
SCMG/models/Transformer/__pycache__/__init__.cpython-310.pyc +0 -0
SCMG/models/Transformer/__pycache__/model copy 2.cpython-310.pyc +0 -0

SCMG/__pycache__/_version.cpython-310.pyc ADDED Viewed

Binary file (280 Bytes). View file

SCMG/_version.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ def get_versions():
2	+ version = "0.1.1"

SCMG/config/__init__.py ADDED Viewed

File without changes

SCMG/config/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (168 Bytes). View file

SCMG/config/__pycache__/modelparameters.cpython-310.pyc ADDED Viewed

Binary file (430 Bytes). View file

SCMG/config/__pycache__/varables.cpython-310.pyc ADDED Viewed

Binary file (6.19 kB). View file

SCMG/config/modelparameters.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# class ModelParameters():
+#     def __init__(self):
+#         self.NUM_LAYERS = "num_layers"
+#         self.NUM_HEADS = "num_heads"
+#         self.DIM_ATTENTION = "dim_attention"
+#         self.DIM_FEEDFORWARD = "dim_feedforward"
+#         self.DIM_LSTM = "dim_lstm"
+#         self.DIM_EMBEDDING = "dim_embedding"
+#         self.DIM_OUTPUT = "dim_output"
+#         self.RATE_DROPOUT = "rate_dropout"
+#         return
+#
+NUM_LAYERS = "num_layers"
+NUM_HEADS = "num_heads"
+DIM_ATTENTION = "dim_attention"
+DIM_FEEDFORWARD = "dim_feedforward"
+DIM_LSTM = "dim_lstm"
+DIM_EMBEDDING = "dim_embedding"
+DIM_OUTPUT = "dim_output"
+RATE_DROPOUT = "rate_dropout"

SCMG/config/varables.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import re
+from rdkit import Chem
+DEFAULT = "default"
+AUTO = "auto"
+# Variables
+COLUMN_SMILES = "SMILES"
+COLUMN_ENCODER = "Encoder"
+COLUMN_DECODER = "Decoder"
+COLUMN_TASK_TYPE = "TaskType"
+COLUMN_ENCODER_SEQUENCE = "EncoderSequence"
+COLUMN_DECODER_SEQUENCE = "DecoderSequence"
+COLUMN_BOS_TOKEN = "TokenBOS"
+COLUMN_CUTS = "Cuts"
+COLUMN_MIN_TOP_P = "MinTopP"
+COLUMN_MIN_TOKEN_PROB = "MinTokenProb"
+COLUMN_TOKEN_EOS_PROB = "TokenEOSProb"
+COLUMN_MOLNAME = "MolName"
+COLUMN_MOLINDEX = "MolIndex"
+COLUMN_MOL_PROB = "MolProb"
+COLUMN_MOL_PROB_TOPP = "MolProb_TopP"
+# Task
+TOKEN_BEGIN = "<bos>"
+TOKEN_END = "<eos>"
+TOKEN_SEP = "<sep>"
+TOKEN_CODER_SEP = "<delim>"
+# TRAIN = "Train"
+TOKEN_PAD = "<pad>"
+COLUMN_EXCLUDED_MIN = "ExcludedSize"
+COLUMN_SIZE_ToRunForNExt = "ExcludedSize"
+COLUMN_SIZE_EXCLUDED = "ExcludedSize"
+# char_level_molecule_generation
+COLUMN_task_char_mg = "char_mg"
+TOKEN_TASK_CHAR_MG = "<char_mg>"
+# char_level_scaffold_constrained_molecule_generation
+COLUMN_task_char_scmg = "char_scmg"
+TOKEN_TASK_SCMG_CHAR_RAND = "<scmg_char_rand>"
+TOKEN_TASK_SCMG_CHAR_CANO = "<scmg_char_cano>"
+TOKEN_TASK_DG_CHAR_RAND = "<dg_char_rand>"
+TOKEN_TASK_DG_CHAR_CANO = "<dg_char_cano>"
+LIST_HEAVY_ATOMS = ['c', 'C', 'O', 'N', 'n', 'F', '[C@H]', 'Cl', '[C@@H]', 'S', '[nH]', 's', 'o', 'Br', '[C@]', '[C@@]', 'P', 'B', '[N+]', '[P@@]', '[P@]', '[S@@]', '[N@+]', '[S@]', '[N@@+]', '[N-]', 'p']
+COLUMN_EXCLUDE_REASON = "Excluded"
+COLUMN_STATE = "State"
+# chemical_property_prediction
+COLUMN_task_chem_pd = "chem_pd"
+TOKEN_TASK_CHEM_PD = "<chem_pd>"
+# molecule_identification
+COLUMN_task_mol_id = "mol_id"
+TOKEN_TASK_MOL_ID = "<mol_id>"
+FILEPATH_MODEL = "filepath_model"
+FILEPATH_INPUT = "filepath_input"
+DIRPATH_OUTPUT = "dirpath_output"
+RANDOM_AUGUMENT = "random_augument"
+TOP_P = "top_p"
+TOP_K = "top_k"
+MIN_MOL_PROB = "minimum_mol_prob"
+MIN_TOKEN_PROB = "minimum_token_prob"
+MAX_HEAVY_ATOMS = "maximum_heavy_atoms"
+TEMPERATURE = "temperature"
+# Data
+VOCAB = "vocab"
+SIZE_VOCAB = "size_vocab"
+FILENAME_VOCAB = "vocab.pt"
+FILENAME_VOCABSTATE = "vocabstate.pt"
+FILENAME_DATA_RAW = "data.csv"
+TRAIN = "train"
+TEST = "test"
+FILENAME_TRAIN_RAW = "train.pt"
+FILENAME_TRAIN_EPOCH = lambda x: "train_"+str(x)+".pt"
+FILENAME_TEST = "test.pt"
+FILENAME_TEST_RAW = "test.pt"
+FILENAME_TEST_EPOCH = lambda x: "test_"+str(x)+".pt"
+FILEPATH_VOCAB = "filepath_vocab"
+#
+# try:
+#     config.screen_width = os.get_terminal_size()[0]
+# except:
+#     config.screen_width = 141
+MAX_SEQUENCE_LENGTH = "max_sequence_length"
+COLUMN_INCHIKEY = "InchiKey"
+# Train
+MODEL_NAME = "model_name"
+MODEL_TYPE = "model_type"
+MODEL = "model"
+TASKS = "tasks"
+DIRPATH_CHECKPOINT = "dirpath_checkpoint"
+DIRPATH_DATA = "dirpath_data"
+SIZE_BATCH = "size_batch"
+SIZE_BLOCK = "size_block"
+RATE_LEARNING = "rate_learning"
+DEVICE = "device"
+EPOCH           = "epoch"
+EPOCHS           = "epochs"
+NUM_WORKERS = "num_workers"
+DIRPATH_COMPLETED = "dirpath_completed"
+DIRPATH_EXCLUDED = "dirpath_excluded"
+DIRPATH_SBATCH = "dirpath_sbatch"
+# Stats
+TRAIN_LOSS      = "train_loss"
+TEST_LOSS       = "test_loss"
+TIME_ELAPSED    = "time_elapsed"
+RATE_LEARNING   = "rate_learning"
+TOKENS          = "tokens"
+# Model
+FILENAME_MODEL_INIT = "model_init.pt"
+FILENAME_MODEL_LATEST = "model.pt"
+FILENAME_MODEL_TRAINED = lambda x: "model_"+str(x)+".pt"
+FILENAME_MODELSTATE_INIT = "modelstate_init.pt"
+FILENAME_MODELSTATE_LATEST = "modelstate.pt"
+FILENAME_MODELSTATE_TRAINED = lambda x: "modelstate_"+str(x)+".pt"
+FILENAME_SCHEDULER_INIT = "scheduler_init.pt"
+FILENAME_SCHEDULER_LATEST = "scheduler.pt"
+FILENAME_SCHEDULER_TRAINED = lambda x: "scheduler_"+str(x)+".pt"
+FILENAME_OPTIMIZER_INIT = "optimizer_init.pt"
+FILENAME_OPTIMIZER_LATEST = "optimizer.pt"
+FILENAME_OPTIMIZER_TRAINED = lambda x: "optimizer_"+str(x)+".pt"
+# FILENAME_TRAINLOG_INIT = "train_init.pt"
+FILENAME_TRAINSTATS_LATEST = "trainstats_latest.csv"
+FILENAME_TRAINSTATS_TRAINED = lambda x: "trainstats_"+str(x)+".csv"
+FILENAME_TRAINLOG = "train"
+FORMAT_TIMESTAMP_FILEHANDLER = "%Y%m%d%H%M%S_%f.log"
+FORMAT_TIMESTAMP = "%Y/%m/%d %H:%M:%S %f"
+FORMAT_LOG = ""
+DRY_RUN = "dry_run"
+LOG_LEVEL = "log_level"
+TOKENIZER = "tokenizer"
+RUN_ONE_EPOCH = "run_one_epoch"
+# # Column names
+# IS_NOVEL = "IS_NOVAL"
+# NOVALTY = "Novalty"
+# # VALIDITY = "Validity"
+# IS_VALID = "IS_VALID"
+# IS_NOVAL = "IS_NOVAL"
+# DIR_SAVE = "dir_save"
+# MODEL_LATEST = "model.pt"
+# LOG_TRAIN_LATEST = "train_log.csv"
+# OPTIMIZER_LATEST = "optimizer.pt"
+# SCHEDULER_LATEST = "scheduler.pt"
+# TRAIN_LOSS      = "train_loss"
+# TEST_LOSS       = "test_loss"
+# TIME_ELAPSED    = "time_elapsed"
+# # LR              = "lr"
+# TOKENS          = "tokens"
+LOGP = "logP"
+WEIGHT = "weight"
+QED = "QED"
+VALIDITY = "SMILES_VALID"
+FILENAME_TRAIN_DIST = "train_dist.pt"
+FILENAME_TEST_DIST = "test_dist.pt"
+MODEL_PRETRAIN = "model_pretrained.pt"
+PYFILE_SAMPLER = "sampler.py"
+PYFILE_TRAINER = "trainer.py"
+PYFILE_DATALOADER = "dataloader.py"
+# PYFILE_SAMPLER = "sampler.py"
+# Model parameters
+NUM_LAYERS = "num_layers"
+NUM_ENCODER_LAYERS = "num_encoder_layers"
+NUM_DECODER_LAYERS = "num_decoder_layers"
+NUM_HEADS = "num_heads"
+DIM_ATTENTION = "dim_attention"
+DIM_FEEDFORWARD = "dim_feedforward"
+DIM_LSTM = "dim_lstm"
+DIM_EMBEDDING = "dim_embedding"
+DIM_OUTPUT = "dim_output"
+RATE_DROPOUT = "rate_dropout"
+#Scheduler
+SIZE_STEP = "size_step"
+GAMMA = "gamma"
+# From Reinvent-Scaffold-Decorator
+ATTACHMENT_POINT_TOKEN = "*"
+ATTACHMENT_POINT_NUM_REGEXP = r"\[{}:(\d+)\]".format(re.escape(ATTACHMENT_POINT_TOKEN))
+ATTACHMENT_POINT_REGEXP = r"(?:{0}|\[{0}[^\]]*\])".format(re.escape(ATTACHMENT_POINT_TOKEN))
+ATTACHMENT_POINT_NO_BRACKETS_REGEXP = r"(?<!\[){}".format(re.escape(ATTACHMENT_POINT_TOKEN))
+ATTACHMENT_SEPARATOR_TOKEN = "|"
+SLICE_SMARTS = {
+    "hr": [
+        "[*]!@-[*]"
+    ],
+    "recap": [
+        "[C;$(C=O)]!@-N",  # amides and urea
+        "[C;$(C=O)]!@-O",  # esters
+        "C!@-[N;!$(NC=O)]",  # amines
+        "C!@-[O;!$(NC=O)]",  # ether
+        "[CX3]!@=[CX3]",  # olefin
+        "[N+X4]!@-C",  # quaternary nitrogen
+        "n!@-C",  # aromatic N - aliphatic C
+        "[$([NR][CR]=O)]!@-C",  # lactam nitrogen - aliphatic carbon
+        "c!@-c",  # aromatic C - aromatic C
+        "N!@-[$(S(=O)=O)]"  # sulphonamides
+    ]
+}
+SLICE_SMARTS = {name: [Chem.MolFromSmarts(sma) for sma in smarts] for name, smarts in SLICE_SMARTS.items()}

SCMG/models/GPT/__init__.py ADDED Viewed

File without changes

SCMG/models/GPT/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (172 Bytes). View file

SCMG/models/GPT/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (7.55 kB). View file

SCMG/models/GPT/__pycache__/sampler.cpython-310.pyc ADDED Viewed

Binary file (3.16 kB). View file

SCMG/models/GPT/model.py ADDED Viewed

	@@ -0,0 +1,197 @@

+import math
+import logging
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+# logger = logging.getLogger(__name__)
+from SCMG.config import varables
+from torch.autograd import Variable
+class PositionalEncoder(nn.Module):
+    def __init__(self, config):
+        super(PositionalEncoder, self).__init__()
+        self.Dropout = nn.Dropout(p=config[varables.RATE_DROPOUT])
+        max_len = config[varables.SIZE_BLOCK]
+        pe = torch.zeros(max_len, config[varables.DIM_ATTENTION])
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, config[varables.DIM_ATTENTION], 2).float() * (-math.log(10000.0) / config[varables.DIM_ATTENTION]))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0)
+        self.register_buffer('pe', pe)
+    def forward(self, T):
+        x = self.Dropout(self.pe[:,:T, :])
+        return x
+class Attention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
+        self.Key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.Query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.Value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.Dropout_Attention = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Dropout_Residue = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
+        self.NumberOfHeads = config[varables.NUM_HEADS]
+        self.DimHead = config[varables.DIM_ATTENTION] // self.NumberOfHeads
+        self.DimAttention = config[varables.DIM_ATTENTION]
+    def forward(self, X_1,X_2, mask=None):
+        if X_2 is None:
+            X_2 = X_1
+        BatchSize, T_Encoder, _ = X_1.size()
+        BatchSize, T_Decoder, _ = X_2.size()
+        K = self.Key(  X_1).view(BatchSize, T_Encoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
+        Q = self.Query(X_2).view(BatchSize, T_Decoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
+        V = self.Value(X_1).view(BatchSize, T_Encoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
+        # k,q,v dimension: (BatchSize, SequenceSize, NumberOfHeads, HeadDimension) 3,4,5,16
+        ScoreAttention = (Q @ K.transpose(-2, -1)) / math.sqrt(self.DimHead)
+        ScoreAttention = ScoreAttention.masked_fill(mask==0, -1e9)
+        ScoreAttention = F.softmax(ScoreAttention, dim=-1)
+        ScoreAttention = self.Dropout_Attention(ScoreAttention)
+        # k.transpose(-2,-1): 3,4,16,5
+        # (q@(k.transpose(-2,-1))): 3,4,5,5
+        Z = ScoreAttention @ V
+        # y dimension: 3,4,5,16
+        Z = Z.transpose(1, 2).contiguous().view(BatchSize, T_Decoder, self.DimAttention)
+        # y dimension: 3,5,64
+        Z = self.Dropout_Residue(self.Projection(Z))
+        return Z
+class FeedForward(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        if config[varables.DIM_FEEDFORWARD] == 0:
+            Dim_FeedForward = config[varables.DIM_ATTENTION] *4
+        else:
+            Dim_FeedForward = config[varables.DIM_FEEDFORWARD]
+        self.Linear1 = nn.Linear(config[varables.DIM_EMBEDDING], Dim_FeedForward)
+        self.GELU = nn.GELU()
+        self.Linear2 = nn.Linear(Dim_FeedForward, config[varables.DIM_EMBEDDING])
+        self.Dropout = nn.Dropout(config[varables.RATE_DROPOUT])
+    def forward(self,x):
+        x = self.Linear1(x)
+        x = self.GELU   (x)
+        x = self.Dropout(x)
+        x = self.Linear2(x)
+        return x
+class DecoderBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.LayerNorm1      = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.LayerNorm2      = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Dropout2 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.AttentionMasked = Attention(  config)
+        self.AttentionCross  = Attention(  config)
+        self.FeedForward     = FeedForward(config)
+    def forward(self, X_Decoder,Mask_Decoder):
+        X_Decoder = self.Dropout1(X_Decoder + self.AttentionMasked(self.LayerNorm1(X_Decoder), None,                       Mask_Decoder))
+        X_Decoder = self.Dropout2(X_Decoder + self.FeedForward    (self.LayerNorm2(X_Decoder)                                          ))
+        return X_Decoder
+class Model(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        # Varables
+        self.Dim_Attention = config[varables.DIM_ATTENTION]
+        self.Token_Padding_Decoder = config["Token_Padding_Decoder"]
+        # Embedding and positional encoding layers
+        self.Embedding_Decoder = nn.Embedding(len(config["vocab_decoder"]), config[varables.DIM_ATTENTION])
+        self.pos_emb = PositionalEncoder(config)
+        # Dropout and normalization layers
+        self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.LayerNorm1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        # Transformer layers
+        self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        # Output layer
+        self.head = nn.Linear(config[varables.DIM_ATTENTION], len(config["vocab_decoder"]), bias=False)
+        # Init
+        self.apply(self._init_weights)
+        self.optimizer = None
+        # logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
+    def _init_weights(self, module):
+        for p in module.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+        # if isinstance(module, (nn.Linear, nn.Embedding)):
+        #     module.weight.data.normal_(mean=0.0, std=0.02)
+        #     if isinstance(module, nn.Linear) and module.bias is not None:
+        #         module.bias.data.zero_()
+        # elif isinstance(module, nn.LayerNorm):
+        #     module.bias.data.zero_()
+        #     module.weight.data.fill_(1.0)
+    def init_optimizers(self,train_config):
+        optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
+        return optimizer
+    def init_scheduler(self,train_config):
+        scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
+        return scheduler
+    def get_collate_fn(self, vocab_encoder,vocab_decoder):
+        def collate(results):
+            X_Encoder = [a[0] for a in results]
+            X_Decoder = [a[1] for a in results]
+            boundary = -1
+            max_len_x = max([len(a) for a in X_Encoder])
+            max_len_y = max([len(a) for a in X_Decoder])
+            x = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in X_Encoder],dtype=torch.long)
+            y = torch.tensor([(a+[vocab_decoder[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in X_Decoder],dtype=torch.long)
+            return x,y,boundary
+        return collate
+    def generate_masks(self, X_Decoder):
+        # Generate encoder, decoder, cross masks
+        T = X_Decoder.shape[1]
+        Mask_Decoder = (X_Decoder != self.Token_Padding_Decoder).unsqueeze(-2).unsqueeze(-2).repeat(1,1,T,1)
+        mask_tril = torch.tril(torch.ones(T, T)).view(1, 1, T, T).to(Mask_Decoder.device)
+        Mask_Decoder = Mask_Decoder.masked_fill(mask_tril==0,0)
+        return Mask_Decoder
+    def forward(self, X_Encoder, X_Decoder, Y_Decoder_Ref=None,boundary=None):
+        Mask_Decoder = self.generate_masks(X_Decoder)
+        # preprocess
+        X_Decoder = self.Dropout1(self.Embedding_Decoder(X_Decoder) * math.sqrt(self.Dim_Attention) + self.pos_emb(X_Decoder.size(1)))
+        # Decoder blocks
+        for decoder_block in self.decoder_blocks:
+            X_Decoder = decoder_block(X_Decoder,Mask_Decoder)
+        X_Decoder = self.LayerNorm1(X_Decoder)
+        Y_Decoder_Logits = self.head(X_Decoder)
+        loss = None
+        if Y_Decoder_Ref is not None:
+            loss = F.cross_entropy(Y_Decoder_Logits.view(-1, Y_Decoder_Logits.size(-1)), Y_Decoder_Ref.view(-1),ignore_index=self.Token_Padding_Decoder)
+        return Y_Decoder_Logits, loss

SCMG/models/GPT/sampler.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import random
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+def set_seed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+def top_k_logits(logits, k):
+    v, ix = torch.topk(logits, k)
+    out = logits.clone()
+    out[out < v[:, [-1]]] = -float('Inf')
+    return out
+@torch.no_grad()
+def sample(model, x, steps, temperature=1.0, sample=False, top_k=None):
+    block_size = model.get_block_size()
+    model.eval()
+    for k in range(steps):
+        x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
+        logits, _ = model(x_cond)
+        logits = logits[:, -1, :] / temperature
+        if top_k is not None:
+            logits = top_k_logits(logits, top_k)
+        probs = F.softmax(logits, dim=-1)
+        if sample:
+            ix = torch.multinomial(probs, num_samples=1)
+        else:
+            _, ix = torch.topk(probs, k=1, dim=-1)
+        x = torch.cat((x, ix), dim=1)
+    return x
+@torch.no_grad()
+def sample(model, x, steps, temperature=1.0,boundary=None):
+    block_size = model.get_block_size()
+    model.eval()
+    for k in range(steps):
+        x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
+        logits, _ = model(x_cond,boundary=boundary)
+        logits = logits[:, -1, :] / temperature
+        probs = F.softmax(logits, dim=-1)
+        ix = torch.multinomial(probs, num_samples=1)
+        x = torch.cat((x, ix), dim=1)
+    return x
+'L_5*C(=O)NCc1cccc(OC)c1.*c1nsc2ccccc12COc1cccc(CNC(=O)c2cccc(NC(=O)c3nsc4ccccc34)c2)c1'
+# for i in range(1,21):
+def sample_L(i,option='string'):
+    # i=2
+    prefix = 'L_'+str(i)
+    string_input = prefix + '*O=C1NN=Cc2c1cccc2.*O=C(C1CC1)N1CCNCC1'
+    array_input = [vocab[a] for a in ['<bos>'] + list(string_input)]
+    boundary = [len(array_input)]
+    tensor_input = torch.tensor(array_input,device='cuda').unsqueeze(0).repeat(32,1)
+    boundary = boundary*32
+    tensor_output = sample(model,tensor_input,250,boundary=boundary)
+    strings_output = []
+    for j in range(tensor_output.shape[0]):
+        list_string_output = [inv[a] for a in tensor_output[j,boundary[j]:].cpu().numpy() if a != vocab['<pad>']]
+        # if list_string_output[0] == '<bos>':
+        #     list_string_output = list_string_output[1:]
+        if list_string_output[-1] == '<eos>':
+            list_string_output = list_string_output[:-1]
+        string_output = ''.join(list_string_output)
+        strings_output.append(string_output)
+        print(string_output)
+    for j in range(tensor_output.shape[0]):
+        if test_valid(strings_output[j]):
+            print(1)
+        else:
+            print(0)
+    # logits,_ = model(tensor_input,boundary=boundary)
+['<bos>', 'L', '_', '5', '*', 'C', '(', '=', 'O', ')', 'N', 'C', 'c', '1', 'c', 'c', 'c', 'c', '(', 'O', 'C', ')', 'c', '1', '.', '*', 'c', '1', 'n', 's', 'c', '2', 'c', 'c', 'c', 'c', 'c', '1', '2', 'C', 'O', 'c', '1', 'c', 'c', 'c', 'c', '(', 'C', 'N', 'C', '(', '=', 'O', ')', 'c', '2', 'c', 'c', 'c', 'c', '(', 'N', 'C', '(', '=', 'O', ')', 'c', '3', 'n', 's', 'c', '4', 'c', 'c', 'c', 'c', 'c', '3', '4', ')', 'c', '2', ')', 'c', '1', '<eos>']

SCMG/models/GPT2/__init__.py ADDED Viewed

File without changes

SCMG/models/GPT2/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (173 Bytes). View file

SCMG/models/GPT2/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (7.56 kB). View file

SCMG/models/GPT2/__pycache__/sampler.cpython-310.pyc ADDED Viewed

Binary file (3.17 kB). View file

SCMG/models/GPT2/model.py ADDED Viewed

	@@ -0,0 +1,197 @@

+import math
+import logging
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+# logger = logging.getLogger(__name__)
+from SCMG.config import varables
+from torch.autograd import Variable
+class PositionalEncoder(nn.Module):
+    def __init__(self, config):
+        super(PositionalEncoder, self).__init__()
+        self.Dropout = nn.Dropout(p=config[varables.RATE_DROPOUT])
+        max_len = config[varables.SIZE_BLOCK]
+        pe = torch.zeros(max_len, config[varables.DIM_ATTENTION])
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, config[varables.DIM_ATTENTION], 2).float() * (-math.log(10000.0) / config[varables.DIM_ATTENTION]))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0)
+        self.register_buffer('pe', pe)
+    def forward(self, T):
+        x = self.Dropout(self.pe[:,:T, :])
+        return x
+class Attention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
+        self.Key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.Query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.Value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.Dropout_Attention = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Dropout_Residue = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
+        self.NumberOfHeads = config[varables.NUM_HEADS]
+        self.DimHead = config[varables.DIM_ATTENTION] // self.NumberOfHeads
+        self.DimAttention = config[varables.DIM_ATTENTION]
+    def forward(self, X_1,X_2, mask=None):
+        if X_2 is None:
+            X_2 = X_1
+        BatchSize, T_Encoder, _ = X_1.size()
+        BatchSize, T_Decoder, _ = X_2.size()
+        K = self.Key(  X_1).view(BatchSize, T_Encoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
+        Q = self.Query(X_2).view(BatchSize, T_Decoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
+        V = self.Value(X_1).view(BatchSize, T_Encoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
+        # k,q,v dimension: (BatchSize, SequenceSize, NumberOfHeads, HeadDimension) 3,4,5,16
+        ScoreAttention = (Q @ K.transpose(-2, -1)) / math.sqrt(self.DimHead)
+        ScoreAttention = ScoreAttention.masked_fill(mask==0, -1e9)
+        ScoreAttention = F.softmax(ScoreAttention, dim=-1)
+        ScoreAttention = self.Dropout_Attention(ScoreAttention)
+        # k.transpose(-2,-1): 3,4,16,5
+        # (q@(k.transpose(-2,-1))): 3,4,5,5
+        Z = ScoreAttention @ V
+        # y dimension: 3,4,5,16
+        Z = Z.transpose(1, 2).contiguous().view(BatchSize, T_Decoder, self.DimAttention)
+        # y dimension: 3,5,64
+        Z = self.Dropout_Residue(self.Projection(Z))
+        return Z
+class FeedForward(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        if config[varables.DIM_FEEDFORWARD] == 0:
+            Dim_FeedForward = config[varables.DIM_ATTENTION] *4
+        else:
+            Dim_FeedForward = config[varables.DIM_FEEDFORWARD]
+        self.Linear1 = nn.Linear(config[varables.DIM_EMBEDDING], Dim_FeedForward)
+        self.GELU = nn.GELU()
+        self.Linear2 = nn.Linear(Dim_FeedForward, config[varables.DIM_EMBEDDING])
+        self.Dropout = nn.Dropout(config[varables.RATE_DROPOUT])
+    def forward(self,x):
+        x = self.Linear1(x)
+        x = self.GELU   (x)
+        x = self.Dropout(x)
+        x = self.Linear2(x)
+        return x
+class DecoderBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.LayerNorm1      = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.LayerNorm2      = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Dropout2 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.AttentionMasked = Attention(  config)
+        self.AttentionCross  = Attention(  config)
+        self.FeedForward     = FeedForward(config)
+    def forward(self, X_Decoder,Mask_Decoder):
+        X_Decoder = self.Dropout1(X_Decoder + self.AttentionMasked(self.LayerNorm1(X_Decoder), None,                       Mask_Decoder))
+        X_Decoder = self.Dropout2(X_Decoder + self.FeedForward    (self.LayerNorm2(X_Decoder)                                          ))
+        return X_Decoder
+class Model(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        # Varables
+        self.Dim_Attention = config[varables.DIM_ATTENTION]
+        self.Token_Padding_Decoder = config["Token_Padding_Decoder"]
+        # Embedding and positional encoding layers
+        self.Embedding_Decoder = nn.Embedding(len(config["vocab_decoder"]), config[varables.DIM_ATTENTION])
+        self.pos_emb = PositionalEncoder(config)
+        # Dropout and normalization layers
+        self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.LayerNorm1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        # Transformer layers
+        self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_DECODER_LAYERS])])
+        # Output layer
+        self.head = nn.Linear(config[varables.DIM_ATTENTION], len(config["vocab_decoder"]), bias=False)
+        # Init
+        self.apply(self._init_weights)
+        self.optimizer = None
+        # logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
+    def _init_weights(self, module):
+        for p in module.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+        # if isinstance(module, (nn.Linear, nn.Embedding)):
+        #     module.weight.data.normal_(mean=0.0, std=0.02)
+        #     if isinstance(module, nn.Linear) and module.bias is not None:
+        #         module.bias.data.zero_()
+        # elif isinstance(module, nn.LayerNorm):
+        #     module.bias.data.zero_()
+        #     module.weight.data.fill_(1.0)
+    def init_optimizers(self,train_config):
+        optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
+        return optimizer
+    def init_scheduler(self,train_config):
+        scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
+        return scheduler
+    def get_collate_fn(self, vocab_encoder,vocab_decoder):
+        def collate(results):
+            X_Encoder = [a[0] for a in results]
+            X_Decoder = [a[1] for a in results]
+            boundary = -1
+            max_len_x = max([len(a) for a in X_Encoder])
+            max_len_y = max([len(a) for a in X_Decoder])
+            x = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in X_Encoder],dtype=torch.long)
+            y = torch.tensor([(a+[vocab_decoder[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in X_Decoder],dtype=torch.long)
+            return x,y,boundary
+        return collate
+    def generate_masks(self, X_Decoder):
+        # Generate encoder, decoder, cross masks
+        T = X_Decoder.shape[1]
+        Mask_Decoder = (X_Decoder != self.Token_Padding_Decoder).unsqueeze(-2).unsqueeze(-2).repeat(1,1,T,1)
+        mask_tril = torch.tril(torch.ones(T, T)).view(1, 1, T, T).to(Mask_Decoder.device)
+        Mask_Decoder = Mask_Decoder.masked_fill(mask_tril==0,0)
+        return Mask_Decoder
+    def forward(self, X_Encoder, X_Decoder, Y_Decoder_Ref=None,boundary=None):
+        Mask_Decoder = self.generate_masks(X_Decoder)
+        # preprocess
+        X_Decoder = self.Dropout1(self.Embedding_Decoder(X_Decoder) * math.sqrt(self.Dim_Attention) + self.pos_emb(X_Decoder.size(1)))
+        # Decoder blocks
+        for decoder_block in self.decoder_blocks:
+            X_Decoder = decoder_block(X_Decoder,Mask_Decoder)
+        X_Decoder = self.LayerNorm1(X_Decoder)
+        Y_Decoder_Logits = self.head(X_Decoder)
+        loss = None
+        if Y_Decoder_Ref is not None:
+            loss = F.cross_entropy(Y_Decoder_Logits.view(-1, Y_Decoder_Logits.size(-1)), Y_Decoder_Ref.view(-1),ignore_index=self.Token_Padding_Decoder)
+        return Y_Decoder_Logits, loss

SCMG/models/GPT2/sampler.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import random
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+def set_seed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+def top_k_logits(logits, k):
+    v, ix = torch.topk(logits, k)
+    out = logits.clone()
+    out[out < v[:, [-1]]] = -float('Inf')
+    return out
+@torch.no_grad()
+def sample(model, x, steps, temperature=1.0, sample=False, top_k=None):
+    block_size = model.get_block_size()
+    model.eval()
+    for k in range(steps):
+        x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
+        logits, _ = model(x_cond)
+        logits = logits[:, -1, :] / temperature
+        if top_k is not None:
+            logits = top_k_logits(logits, top_k)
+        probs = F.softmax(logits, dim=-1)
+        if sample:
+            ix = torch.multinomial(probs, num_samples=1)
+        else:
+            _, ix = torch.topk(probs, k=1, dim=-1)
+        x = torch.cat((x, ix), dim=1)
+    return x
+@torch.no_grad()
+def sample(model, x, steps, temperature=1.0,boundary=None):
+    block_size = model.get_block_size()
+    model.eval()
+    for k in range(steps):
+        x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
+        logits, _ = model(x_cond,boundary=boundary)
+        logits = logits[:, -1, :] / temperature
+        probs = F.softmax(logits, dim=-1)
+        ix = torch.multinomial(probs, num_samples=1)
+        x = torch.cat((x, ix), dim=1)
+    return x
+'L_5*C(=O)NCc1cccc(OC)c1.*c1nsc2ccccc12COc1cccc(CNC(=O)c2cccc(NC(=O)c3nsc4ccccc34)c2)c1'
+# for i in range(1,21):
+def sample_L(i,option='string'):
+    # i=2
+    prefix = 'L_'+str(i)
+    string_input = prefix + '*O=C1NN=Cc2c1cccc2.*O=C(C1CC1)N1CCNCC1'
+    array_input = [vocab[a] for a in ['<bos>'] + list(string_input)]
+    boundary = [len(array_input)]
+    tensor_input = torch.tensor(array_input,device='cuda').unsqueeze(0).repeat(32,1)
+    boundary = boundary*32
+    tensor_output = sample(model,tensor_input,250,boundary=boundary)
+    strings_output = []
+    for j in range(tensor_output.shape[0]):
+        list_string_output = [inv[a] for a in tensor_output[j,boundary[j]:].cpu().numpy() if a != vocab['<pad>']]
+        # if list_string_output[0] == '<bos>':
+        #     list_string_output = list_string_output[1:]
+        if list_string_output[-1] == '<eos>':
+            list_string_output = list_string_output[:-1]
+        string_output = ''.join(list_string_output)
+        strings_output.append(string_output)
+        print(string_output)
+    for j in range(tensor_output.shape[0]):
+        if test_valid(strings_output[j]):
+            print(1)
+        else:
+            print(0)
+    # logits,_ = model(tensor_input,boundary=boundary)
+['<bos>', 'L', '_', '5', '*', 'C', '(', '=', 'O', ')', 'N', 'C', 'c', '1', 'c', 'c', 'c', 'c', '(', 'O', 'C', ')', 'c', '1', '.', '*', 'c', '1', 'n', 's', 'c', '2', 'c', 'c', 'c', 'c', 'c', '1', '2', 'C', 'O', 'c', '1', 'c', 'c', 'c', 'c', '(', 'C', 'N', 'C', '(', '=', 'O', ')', 'c', '2', 'c', 'c', 'c', 'c', '(', 'N', 'C', '(', '=', 'O', ')', 'c', '3', 'n', 's', 'c', '4', 'c', 'c', 'c', 'c', 'c', '3', '4', ')', 'c', '2', ')', 'c', '1', '<eos>']

SCMG/models/LSTM/__init__.py ADDED Viewed

File without changes

SCMG/models/LSTM/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (173 Bytes). View file

SCMG/models/LSTM/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (2.76 kB). View file

SCMG/models/LSTM/__pycache__/sampler.cpython-310.pyc ADDED Viewed

Binary file (1 kB). View file

SCMG/models/LSTM/__pycache__/trainer.cpython-310.pyc ADDED Viewed

Binary file (5.35 kB). View file

SCMG/models/LSTM/model.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.utils.rnn as rnn_utils
+from SCMG.config import varables
+class Model(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.vocab = config["vocab_encoder"]
+        # self.vocabulary = vocabulary
+        # self.hidden_size = config.hidden
+        # self.num_layers = config.num_layers
+        # self.dropout = config.dropout
+        # self.vocab_size = self.input_size = self.output_size = len(vocabulary)
+        self.embedding_layer = nn.Embedding(len(config["vocab_encoder"]), config[varables.DIM_EMBEDDING])
+        self.lstm_layer = nn.LSTM(config[varables.DIM_EMBEDDING], config[varables.DIM_LSTM],
+                                  config[varables.NUM_LAYERS], dropout=config[varables.RATE_DROPOUT],
+                                  batch_first=True)
+        self.linear_layer = nn.Linear(config[varables.DIM_LSTM], len(config["vocab_encoder"]))
+    def get_collate_fn(self, vocab_encoder,vocab_decoder):
+        def collate(results):
+            x_in = None
+            y_in = [a[0] + [vocab_encoder[varables.TOKEN_SEP]] + a[1] for a in results]
+            # boundary = [a[2] for a in results]
+            max_len = max([len(a) for a in y_in])
+            y = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len-len(a))) for a in y_in],dtype=torch.long)
+            return x_in,y,0
+        return collate
+    def init_optimizers(self,train_config):
+        optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
+        return optimizer
+    def init_scheduler(self,train_config):
+        scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
+        return scheduler
+    def forward(self, src, trg, trg_out, boundary=None):
+        # x = ([src , torch.tensor([self.vocab["<sep>"]]*x.size[0]).unsqueeze(1).to(x.device), trg],dim=1)
+        hiddens=None
+        x = self.embedding_layer(trg)
+        # x = rnn_utils.pack_padded_sequence(x, lengths, batch_first=True)
+        self.lstm_layer.flatten_parameters()
+        x, hiddens = self.lstm_layer(x, hiddens)
+        # x, _ = rnn_utils.pad_packed_sequence(x, batch_first=True)
+        logits = self.linear_layer(x)
+        loss = None
+        if trg_out is not None:
+            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), trg_out.view(-1))
+        return logits, loss

SCMG/models/LSTM/sampler.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from MoleculeProcessing.utils.utils        import *
+from MoleculeProcessing.utils.utils_sample  import *
+import torch.nn.functional as F
+def sample(model,vocab_bos,size_batch=32,size_block=70,temperature=1.,):
+    model,device = load_to_device(model)
+    model.eval()
+    with torch.no_grad():
+        tensor_sampled = torch.zeros(size_batch,size_block+1,dtype=torch.long,device=device)
+        tensor_sampled[:,0] = vocab_bos
+        hiddens = None
+        for i in range(size_block):
+            input_current = tensor_sampled[:,[i]]
+            probs,hiddens = model.forward(input_current,hiddens)
+            probs = probs[:,-1]
+            probs = probs * temperature
+            probs = F.softmax(probs,dim=-1)
+            sample = torch.distributions.categorical.Categorical(probs).sample()
+            tensor_sampled[:,i+1] = sample
+        return tensor_sampled

SCMG/models/LSTM/trainer.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import math
+import logging
+import time
+from tqdm import tqdm
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.optim.lr_scheduler import LambdaLR
+from torch.utils.data.dataloader import DataLoader
+from MoleculeProcessing.utils.utils_train import *
+logger = logging.getLogger(__name__)
+from MoleculeProcessing.utils.utils        import *
+from MoleculeProcessing.utils.utils_train  import *
+from MoleculeProcessing.config.config      import *
+class TrainerConfig:
+    learning_rate = 3e-4
+    betas = (0.9, 0.95)
+    grad_norm_clip = 1.0
+    weight_decay = 0.1
+    lr_decay = False
+    warmup_tokens = 375e6
+    final_tokens = 260e9
+    ckpt_path = None
+    num_workers = 0
+    config = None
+    epoch = 0
+    def __init__(self, **kwargs):
+        for k,v in kwargs.items():
+            setattr(self, k, v)
+class Trainer:
+    def __init__(self, model, train_dataset, test_dataset, config):
+        self.model = model
+        self.train_dataset = train_dataset
+        self.test_dataset = test_dataset
+        self.config = config
+        # continue train if previous model exists
+        self.train_log = init_train_log()
+        if os.path.exists(os.path.join(self.config.config.path_checkpoint,LOG_TRAIN_LATEST)):
+            self.train_log = pd.read_csv(os.path.join(self.config.config.path_checkpoint,LOG_TRAIN_LATEST))
+        self.config.epoch = self.train_log.shape[0]
+        if self.train_log.shape[0]>0:
+            self.model      = load_model(    self.config.config.path_checkpoint,self.config.epoch-1)
+            self.optimizer  = load_optimizer(self.config.config.path_checkpoint,self.config.epoch-1)
+            self.tokens     = self.train_log.loc[self.config.epoch-1,TOKENS]
+            self.scheduler  = load_scheduler(self.config.config.path_checkpoint,self.config.epoch-1)
+        else:
+            self.tokens = 0 # counter used for learning rate decay
+            self.optimizer = model.configure_optimizers(config)
+            self.scheduler = optim.lr_scheduler.StepLR(self.optimizer,
+                10,
+                0.5)
+        self.criterion = nn.CrossEntropyLoss()
+        # take over whatever gpus are on the system
+        self.device = 'cpu'
+        if torch.cuda.is_available():
+            self.device = torch.cuda.current_device()
+            self.model = torch.nn.DataParallel(self.model).to(self.device)
+    def save_checkpoint(self):
+        path_checkpoint = self.config.config.path_checkpoint
+        # DataParallel wrappers keep raw model object in .module attribute
+        raw_model = self.model.module if hasattr(self.model, "module") else self.model
+        logger.info("saving %s", path_checkpoint)
+        path_model_epoch = add_before_extension(os.path.join(path_checkpoint,
+                MODEL_LATEST),
+            str(self.config.epoch))
+        torch.save(raw_model, path_model_epoch)
+        # optimizer
+        path_optimizer_epoch = \
+            add_before_extension(
+                os.path.join(
+                    path_checkpoint,
+                    OPTIMIZER_LATEST
+                    ),
+                    str(self.config.epoch)
+                    )
+        torch.save(
+            self.optimizer,
+            path_optimizer_epoch
+            )
+        # optimizer
+        path_scheduler_epoch = \
+            add_before_extension(
+                os.path.join(
+                    path_checkpoint,
+                    SCHEDULER_LATEST
+                    ),
+                    str(self.config.epoch)
+                    )
+        torch.save(
+            self.scheduler,
+            path_scheduler_epoch
+            )
+        # train log
+        self.train_log.to_csv(
+            os.path.join(
+                path_checkpoint,
+                LOG_TRAIN_LATEST
+                )
+            ,index=False
+            )
+        path_train_log_epoch = \
+            add_before_extension(
+                os.path.join(
+                    path_checkpoint,
+                    LOG_TRAIN_LATEST
+                    ),
+                str(self.config.epoch)
+                )
+        self.train_log.to_csv(
+            path_train_log_epoch,
+            index=False)
+        # torch.save(self.token,os.path.join(path_checkpoint,'tokens_'+self.config.epoch+'.pt'))
+    def train(self):
+        model, config = self.model, self.config
+        raw_model = model.module if hasattr(self.model, "module") else model
+        optimizer = self.optimizer
+        scheduler = self.scheduler
+        while self.config.epoch < config.config.epochs and self.config.epoch != config.config.epochs:
+            current_status = dict([[a,None] for a in self.train_log.columns])
+            current_status[EPOCH] = self.config.epoch
+            time_start = time.time()
+            current_status = self.run_epoch('train',current_status)
+            current_status[TIME_ELAPSED] = int(time.time()-time_start)
+            current_status[TOKENS] = self.tokens
+            if self.test_dataset is not None:
+                current_status = self.run_epoch('test',current_status)
+            self.train_log.loc[self.config.epoch] = current_status
+            scheduler.step()
+            self.save_checkpoint()
+            self.config.epoch += 1
+    def run_epoch(self,split,current_status):
+        model = self.model
+        is_train = split == 'train'
+        model.train(is_train)
+        data = self.train_dataset if is_train else self.test_dataset
+        data.shuffle(random_state=self.config.epoch)
+        loader = DataLoader(data, shuffle=False, pin_memory=True,
+                            batch_size=self.config.config.size_batch,
+                            num_workers=self.config.num_workers)
+        losses = []
+        pbar = tqdm(enumerate(loader), total=len(loader)) if is_train else enumerate(loader)
+        for it, (x, y) in pbar:
+            # place data on the correct device
+            x = x.to(self.device)
+            y = y.to(self.device)
+            # forward the model
+            with torch.set_grad_enabled(is_train):
+                outputs,_ = model.forward(x)
+                loss = self.criterion(outputs.view(-1, outputs.shape[-1]),
+                                 y.view(-1))
+                loss = loss.mean() # collapse all losses if they are scattered on multiple gpus
+                losses.append(loss.item())
+            if is_train:
+                # backprop and update the parameters
+                model.zero_grad()
+                loss.backward()
+                torch.nn.utils.clip_grad_norm_(model.parameters(), self.config.grad_norm_clip)
+                self.optimizer.step()
+                # decay the learning rate based on our progress
+                if self.config.lr_decay:
+                    self.tokens += (y >= 0).sum() # number of tokens processed this step (i.e. label is not -100)
+                    if self.tokens < self.config.warmup_tokens:
+                        # linear warmup
+                        lr_mult = float(self.tokens) / float(max(1, self.config.warmup_tokens))
+                    else:
+                        # cosine learning rate decay
+                        progress = float(self.tokens - self.config.warmup_tokens) / float(max(1, self.config.final_tokens - self.config.warmup_tokens))
+                        lr_mult = max(0.1, 0.5 * (1.0 + math.cos(math.pi * progress)))
+                    lr = self.config.learning_rate * lr_mult
+                    for param_group in optimizer.param_groups:
+                        param_group['lr'] = lr
+                else:
+                    lr = self.config.learning_rate
+                current_status[LR] = lr
+                # report progress
+                pbar.set_description(f"epoch {self.config.epoch+1} iter {it}: train loss {loss.item():.5f}. lr {lr:e}")
+        current_status[split+'_loss'] = float(np.mean(losses))
+        if not is_train:
+            test_loss = float(np.mean(losses))
+            logger.info("test loss: %f", test_loss)
+        return current_status

SCMG/models/Reinvent/__init__.py ADDED Viewed

File without changes

SCMG/models/Reinvent/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (177 Bytes). View file

SCMG/models/Reinvent/__pycache__/model copy 2.cpython-310.pyc ADDED Viewed

Binary file (14.4 kB). View file

SCMG/models/Reinvent/__pycache__/model copy.cpython-310.pyc ADDED Viewed

Binary file (8.39 kB). View file

SCMG/models/Reinvent/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (8.79 kB). View file

SCMG/models/Reinvent/__pycache__/sampler.cpython-310.pyc ADDED Viewed

Binary file (3.17 kB). View file

SCMG/models/Reinvent/model copy 2.py ADDED Viewed

	@@ -0,0 +1,420 @@

+import math
+import logging
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+logger = logging.getLogger(__name__)
+from SCMG.config import varables
+# class ModelConfig():
+#     rate_dropout_embedding = 0.1
+#     rate_dropout_residue = 0.1
+#     rate_dropout_attention = 0.1
+#     block_size=125
+#     def __init__(self, size_vocab, **kwargs):
+#         self.size_vocab = size_vocab
+#         for k,v in kwargs.items():
+#             setattr(self, k, v)
+class CausalSelfAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
+        self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
+        self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+                                     .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+        self.n_head = config[varables.NUM_HEADS]
+        self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
+        self.attention_features = config[varables.DIM_ATTENTION]
+    def forward(self, x, layer_past=None):
+        B, T, C = x.size()
+        k =   self.key(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
+        q = self.query(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
+        v = self.value(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+        att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+        att = self.dropout_attention(att)
+        y = att @ v
+        y = y.transpose(1, 2).contiguous().view(B, T, self.attention_features)
+        y = self.dropout_residue(self.projection(y))
+        return y
+class CrossAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
+        self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
+        self.n_head = config[varables.NUM_HEADS]
+        self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
+        self.attention_features = config[varables.DIM_ATTENTION]
+        self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+                                .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+    def forward(self, x_encoder,x_decoder, layer_past=None):
+        B_encoder, T_encoder, C_encoder = x_encoder.size()
+        B_decoder, T_decoder, C_decoder = x_decoder.size()
+        k = self.key(  x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
+        q = self.query(x_decoder).view(B_encoder, T_decoder, self.n_head,self.single_head_dim).transpose(1, 2)
+        v = self.value(x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+        att = att.masked_fill(self.mask[:,:,:T_decoder,:T_encoder] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+        att = self.dropout_attention(att)
+        y = att @ v
+        y = y.transpose(1, 2).contiguous().view(B_encoder, T_decoder, self.attention_features)
+        y = self.dropout_residue(self.projection(y))
+        return y
+class EncoderBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.attn = CausalSelfAttention(config)
+        self.mlp = nn.Sequential(
+            nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
+            nn.GELU(),
+            nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
+            nn.Dropout(config[varables.RATE_DROPOUT]),
+        )
+    def forward(self, x):
+         # = y_input
+        x = x + self.attn(self.ln1(x))
+        x = x + self.mlp(self.ln2(x))
+        return x
+class DecoderBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.masked_attn = CausalSelfAttention(config)
+        self.cross_attn = CrossAttention(config)
+        self.mlp = nn.Sequential(
+            nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
+            nn.GELU(),
+            nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
+            nn.Dropout(config[varables.RATE_DROPOUT]),
+        )
+    def forward(self, x_encoder,x):
+         # = y_input
+        x = x + self.masked_attn(self.ln1(x))
+        x = x + self.cross_attn(x_encoder,self.ln1(x))
+        x = x + self.mlp(self.ln2(x))
+        return x
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+class Norm(nn.Module):
+    def __init__(self, d_model, eps = 1e-6):
+        super().__init__()
+        self.size = d_model
+        # create two learnable parameters to calibrate normalisation
+        self.alpha = nn.Parameter(torch.ones(self.size))
+        self.bias = nn.Parameter(torch.zeros(self.size))
+        self.eps = eps
+    def forward(self, x):
+        norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) \
+        / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias
+        return norm
+def attention(q, k, v, d_k, mask=None, dropout=None):
+    scores = torch.matmul(q, k.transpose(-2, -1)) /  math.sqrt(d_k)
+    if mask is not None:
+        mask = mask.unsqueeze(1)
+        scores = scores.masked_fill(mask == 0, -1e9)
+    scores = F.softmax(scores, dim=-1)
+    if dropout is not None:
+        scores = dropout(scores)
+    output = torch.matmul(scores, v)
+    return output
+class MultiHeadAttention(nn.Module):
+    def __init__(self, heads, d_model, dropout = 0.1):
+        super().__init__()
+        self.d_model = d_model
+        self.d_k = d_model // heads
+        self.h = heads
+        self.q_linear = nn.Linear(d_model, d_model)
+        self.v_linear = nn.Linear(d_model, d_model)
+        self.k_linear = nn.Linear(d_model, d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.out = nn.Linear(d_model, d_model)
+    def forward(self, q, k, v, mask=None):
+        bs = q.size(0)
+        # perform linear operation and split into N heads
+        k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
+        q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
+        v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
+        # transpose to get dimensions bs * N * sl * d_model
+        k = k.transpose(1,2)
+        q = q.transpose(1,2)
+        v = v.transpose(1,2)
+        # calculate attention using function we will define next
+        scores = attention(q, k, v, self.d_k, mask, self.dropout)
+        # concatenate heads and put through final linear layer
+        concat = scores.transpose(1,2).contiguous()\
+        .view(bs, -1, self.d_model)
+        output = self.out(concat)
+        return output
+class FeedForward(nn.Module):
+    def __init__(self, d_model, d_ff=2048, dropout = 0.1):
+        super().__init__()
+        # We set d_ff as a default to 2048
+        self.linear_1 = nn.Linear(d_model, d_ff)
+        self.dropout = nn.Dropout(dropout)
+        self.linear_2 = nn.Linear(d_ff, d_model)
+    def forward(self, x):
+        x = self.dropout(F.relu(self.linear_1(x)))
+        x = self.linear_2(x)
+        return x
+import torch
+import torch.nn as nn
+import copy
+class EncoderLayer(nn.Module):
+    def __init__(self, d_model, heads, dropout=0.1):
+        super().__init__()
+        self.norm_1 = Norm(d_model)
+        self.norm_2 = Norm(d_model)
+        self.attn = MultiHeadAttention(heads, d_model, dropout=dropout)
+        self.ff = FeedForward(d_model, dropout=dropout)
+        self.dropout_1 = nn.Dropout(dropout)
+        self.dropout_2 = nn.Dropout(dropout)
+    def forward(self, x, mask):
+        x2 = self.norm_1(x)
+        x = x + self.dropout_1(self.attn(x2,x2,x2,mask))
+        x2 = self.norm_2(x)
+        x = x + self.dropout_2(self.ff(x2))
+        return x
+# build a decoder layer with two multi-head attention layers and
+# one feed-forward layer
+class DecoderLayer(nn.Module):
+    def __init__(self, d_model, heads, dropout=0.1):
+        super().__init__()
+        self.norm_1 = Norm(d_model)
+        self.norm_2 = Norm(d_model)
+        self.norm_3 = Norm(d_model)
+        self.dropout_1 = nn.Dropout(dropout)
+        self.dropout_2 = nn.Dropout(dropout)
+        self.dropout_3 = nn.Dropout(dropout)
+        self.attn_1 = MultiHeadAttention(heads, d_model, dropout=dropout)
+        self.attn_2 = MultiHeadAttention(heads, d_model, dropout=dropout)
+        self.ff = FeedForward(d_model, dropout=dropout)
+    def forward(self, x, e_outputs, src_mask, trg_mask):
+        x2 = self.norm_1(x)
+        x = x + self.dropout_1(self.attn_1(x2, x2, x2, trg_mask))
+        x2 = self.norm_2(x)
+        x = x + self.dropout_2(self.attn_2(x2, e_outputs, e_outputs, \
+        src_mask))
+        x2 = self.norm_3(x)
+        x = x + self.dropout_3(self.ff(x2))
+        return x
+import torch
+import torch.nn as nn
+import math
+from torch.autograd import Variable
+class Embedder(nn.Module):
+    def __init__(self, vocab_size, d_model):
+        super().__init__()
+        self.d_model = d_model
+        self.embed = nn.Embedding(vocab_size, d_model)
+    def forward(self, x):
+        return self.embed(x)
+class PositionalEncoder(nn.Module):
+    def __init__(self, d_model, max_seq_len = 200, dropout = 0.1):
+        super().__init__()
+        self.d_model = d_model
+        self.dropout = nn.Dropout(dropout)
+        # create constant 'pe' matrix with values dependant on
+        # pos and i
+        pe = torch.zeros(max_seq_len, d_model)
+        for pos in range(max_seq_len):
+            for i in range(0, d_model, 2):
+                pe[pos, i] = \
+                math.sin(pos / (10000 ** ((2 * i)/d_model)))
+                pe[pos, i + 1] = \
+                math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
+        pe = pe.unsqueeze(0)
+        self.register_buffer('pe', pe)
+    def forward(self, x):
+        # make embeddings relatively larger
+        x = x * math.sqrt(self.d_model)
+        #add constant to embedding
+        seq_len = x.size(1)
+        pe = Variable(self.pe[:,:seq_len], requires_grad=False)
+        if x.is_cuda:
+            pe.cuda()
+        x = x + pe
+        return self.dropout(x)
+def get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+class Encoder(nn.Module):
+    def __init__(self, vocab_size, d_model, N, heads, dropout):
+        super().__init__()
+        self.N = N
+        self.embed = Embedder(vocab_size, d_model)
+        self.pe = PositionalEncoder(d_model, dropout=dropout)
+        self.layers = get_clones(EncoderLayer(d_model, heads, dropout), N)
+        self.norm = Norm(d_model)
+    def forward(self, src, mask):
+        x = self.embed(src)
+        x = self.pe(x)
+        for i in range(self.N):
+            x = self.layers[i](x, mask)
+        return self.norm(x)
+class Decoder(nn.Module):
+    def __init__(self, vocab_size, d_model, N, heads, dropout):
+        super().__init__()
+        self.N = N
+        self.embed = Embedder(vocab_size, d_model)
+        self.pe = PositionalEncoder(d_model, dropout=dropout)
+        self.layers = get_clones(DecoderLayer(d_model, heads, dropout), N)
+        self.norm = Norm(d_model)
+    def forward(self, trg, e_outputs, src_mask, trg_mask):
+        x = self.embed(trg)
+        x = self.pe(x)
+        for i in range(self.N):
+            x = self.layers[i](x, e_outputs, src_mask, trg_mask)
+        return self.norm(x)
+class Model(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.encoder = Encoder(len(config["vocab_encoder"]), config[varables.DIM_ATTENTION], config[varables.NUM_LAYERS], config[varables.NUM_HEADS], config[varables.RATE_DROPOUT])
+        self.decoder = Decoder(len(config["vocab_decoder"]), config[varables.DIM_ATTENTION], config[varables.NUM_LAYERS], config[varables.NUM_HEADS], config[varables.RATE_DROPOUT])
+        self.out = nn.Linear(config[varables.DIM_ATTENTION], len(config["vocab_decoder"]))
+        # self.tok_emb = nn.Embedding(config[varables.SIZE_VOCAB], config[varables.DIM_EMBEDDING])
+        # self.pos_emb = nn.Parameter(torch.zeros(1, config[varables.SIZE_BLOCK], config[varables.DIM_EMBEDDING]))
+        # self.drop = nn.Dropout(config[varables.RATE_DROPOUT])
+        # self.encoder_blocks = nn.ModuleList([EncoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        # self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        # self.blocks = nn.Sequential(*[DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        # self.ln_f = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        # self.head = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.SIZE_VOCAB], bias=False)
+        # self.block_size = config[varables.SIZE_BLOCK]
+        # self.apply(self._init_weights)
+        # logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
+        self.optimizer = None
+    def get_block_size(self):
+        return self.block_size
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=0.02)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+    def init_optimizers(self,train_config):
+        optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
+        return optimizer
+    def init_scheduler(self,train_config):
+        scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
+        return scheduler
+    def get_collate_fn(self, vocab_encoder,vocab_decoder):
+        def collate(results):
+            x_in = [a[0] for a in results]
+            y_in = [a[1] for a in results]
+            boundary = -1
+            max_len_x = max([len(a) for a in x_in])
+            max_len_y = max([len(a) for a in y_in])
+            x = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in x_in],dtype=torch.long)
+            y = torch.tensor([(a+[vocab_decoder[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in y_in],dtype=torch.long)
+            return x,y,boundary
+        return collate
+    def forward(self, src, trg, trg_out, boundary=None):
+        src_mask = None
+        trg_mask = torch.tril(torch.ones(trg.shape[1], trg.shape[1])).view(1, 1, trg.shape[1], trg.shape[1]).to(trg.device)
+        e_outputs = self.encoder(src, src_mask)
+        d_output = self.decoder(trg, e_outputs, src_mask, trg_mask)
+        logits = self.out(d_output)
+        loss = None
+        if trg_out is not None:
+            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), trg_out.view(-1))
+        return logits, loss
+# mark test

SCMG/models/Reinvent/model copy.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import math
+import logging
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+logger = logging.getLogger(__name__)
+from SCMG.config import varables
+# class ModelConfig():
+#     rate_dropout_embedding = 0.1
+#     rate_dropout_residue = 0.1
+#     rate_dropout_attention = 0.1
+#     block_size=125
+#     def __init__(self, size_vocab, **kwargs):
+#         self.size_vocab = size_vocab
+#         for k,v in kwargs.items():
+#             setattr(self, k, v)
+class CausalSelfAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
+        self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
+        self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+                                     .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+        self.n_head = config[varables.NUM_HEADS]
+        self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
+        self.attention_features = config[varables.DIM_ATTENTION]
+    def forward(self, x, layer_past=None):
+        B, T, C = x.size()
+        k =   self.key(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
+        q = self.query(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
+        v = self.value(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+        att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+        att = self.dropout_attention(att)
+        y = att @ v
+        y = y.transpose(1, 2).contiguous().view(B, T, self.attention_features)
+        y = self.dropout_residue(self.projection(y))
+        return y
+class CrossAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
+        self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
+        self.n_head = config[varables.NUM_HEADS]
+        self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
+        self.attention_features = config[varables.DIM_ATTENTION]
+        self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+                                .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+    def forward(self, x_encoder,x_decoder, layer_past=None):
+        B_encoder, T_encoder, C_encoder = x_encoder.size()
+        B_decoder, T_decoder, C_decoder = x_decoder.size()
+        k = self.key(  x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
+        q = self.query(x_decoder).view(B_encoder, T_decoder, self.n_head,self.single_head_dim).transpose(1, 2)
+        v = self.value(x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+        att = att.masked_fill(self.mask[:,:,:T_decoder,:T_encoder] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+        att = self.dropout_attention(att)
+        y = att @ v
+        y = y.transpose(1, 2).contiguous().view(B_encoder, T_decoder, self.attention_features)
+        y = self.dropout_residue(self.projection(y))
+        return y
+class EncoderBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.attn = CausalSelfAttention(config)
+        self.mlp = nn.Sequential(
+            nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
+            nn.GELU(),
+            nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
+            nn.Dropout(config[varables.RATE_DROPOUT]),
+        )
+    def forward(self, x):
+         # = y_input
+        x = x + self.attn(self.ln1(x))
+        x = x + self.mlp(self.ln2(x))
+        return x
+class DecoderBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.masked_attn = CausalSelfAttention(config)
+        self.cross_attn = CrossAttention(config)
+        self.mlp = nn.Sequential(
+            nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
+            nn.GELU(),
+            nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
+            nn.Dropout(config[varables.RATE_DROPOUT]),
+        )
+    def forward(self, x_encoder,x):
+         # = y_input
+        x = x + self.masked_attn(self.ln1(x))
+        x = x + self.cross_attn(x_encoder,self.ln1(x))
+        x = x + self.mlp(self.ln2(x))
+        return x
+class Model(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.tok_emb = nn.Embedding(config[varables.SIZE_VOCAB], config[varables.DIM_EMBEDDING])
+        self.pos_emb = nn.Parameter(torch.zeros(1, config[varables.SIZE_BLOCK], config[varables.DIM_EMBEDDING]))
+        self.drop = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.encoder_blocks = nn.ModuleList([EncoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        # self.blocks = nn.Sequential(*[DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        self.ln_f = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.head = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.SIZE_VOCAB], bias=False)
+        self.block_size = config[varables.SIZE_BLOCK]
+        self.apply(self._init_weights)
+        logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
+        self.optimizer = None
+    def get_block_size(self):
+        return self.block_size
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=0.02)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+    def init_optimizers(self,train_config):
+        optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
+        return optimizer
+    def init_scheduler(self,train_config):
+        scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
+        return scheduler
+    def get_collate_fn(self, vocab):
+        def collate(results):
+            x_in = [a[0] for a in results]
+            y_in = [a[1] for a in results]
+            boundary = -1
+            max_len_x = max([len(a) for a in x_in])
+            max_len_y = max([len(a) for a in y_in])
+            x = torch.tensor([(a+[vocab[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in x_in],dtype=torch.long)
+            y = torch.tensor([(a+[vocab[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in y_in],dtype=torch.long)
+            return x,y,boundary
+        return collate
+    def forward(self, x_in, y_in, y_out=None,boundary=None):
+        x_in = self.drop(self.tok_emb(x_in) + self.pos_emb[:, :x_in.size()[1], :])
+        y_in = self.drop(self.tok_emb(y_in) + self.pos_emb[:, :y_in.size()[1], :])
+        #
+        for encoder_block in self.encoder_blocks:
+            x_in = encoder_block(x_in)
+        x_in = self.ln_f(x_in)
+        for decoder_block in self.decoder_blocks:
+            y_in = decoder_block(x_in,y_in)
+        y_in = self.ln_f(y_in)
+        logits = self.head(y_in)
+        loss = None
+        if y_out is not None:
+            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), y_out.view(-1))
+        return logits, loss
+# mark test

SCMG/models/Reinvent/model.py ADDED Viewed

	@@ -0,0 +1,278 @@

+import math
+import logging
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+# logger = logging.getLogger(__name__)
+from SCMG.config import varables
+from torch.autograd import Variable
+# class PositionalEncoder(nn.Module):
+#     def __init__(self, config):
+#         super().__init__()
+#         pe = torch.zeros(config[varables.SIZE_BLOCK], config[varables.DIM_ATTENTION])
+#         for pos in range(config[varables.SIZE_BLOCK]):
+#             for i in range(0, config[varables.DIM_ATTENTION], 2):
+#                 pe[pos, i] = \
+#                 math.sin(pos / (10000 ** ((2 * i)/config[varables.DIM_ATTENTION])))
+#                 pe[pos, i + 1] = \
+#                 math.cos(pos / (10000 ** ((2 * (i + 1))/config[varables.DIM_ATTENTION])))
+#         pe = pe.unsqueeze(0)
+#         self.register_buffer('pe', pe)
+#     def forward(self, T):
+#         #add constant to embedding
+#         x = Variable(self.pe[:,:T], requires_grad=False)
+#         return x
+class PositionalEncoder(nn.Module):
+    def __init__(self, config):
+        super(PositionalEncoder, self).__init__()
+        self.Dropout = nn.Dropout(p=config[varables.RATE_DROPOUT])
+        max_len = config[varables.SIZE_BLOCK]
+        pe = torch.zeros(max_len, config[varables.DIM_ATTENTION])
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, config[varables.DIM_ATTENTION], 2).float() * (-math.log(10000.0) / config[varables.DIM_ATTENTION]))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0)
+        self.register_buffer('pe', pe)
+    def forward(self, T):
+        x = self.Dropout(self.pe[:,:T, :])
+        return x
+class Attention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
+        self.Key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.Query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.Value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.Dropout_Attention = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Dropout_Residue = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
+        self.NumberOfHeads = config[varables.NUM_HEADS]
+        self.DimHead = config[varables.DIM_ATTENTION] // self.NumberOfHeads
+        self.DimAttention = config[varables.DIM_ATTENTION]
+    def forward(self, X_1,X_2, mask=None):
+        if X_2 is None:
+            X_2 = X_1
+        BatchSize, T_Encoder, _ = X_1.size()
+        BatchSize, T_Decoder, _ = X_2.size()
+        K = self.Key(  X_1).view(BatchSize, T_Encoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
+        Q = self.Query(X_2).view(BatchSize, T_Decoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
+        V = self.Value(X_1).view(BatchSize, T_Encoder, self.NumberOfHeads,self.DimHead).transpose(1, 2)
+        # k,q,v dimension: (BatchSize, SequenceSize, NumberOfHeads, HeadDimension) 3,4,5,16
+        ScoreAttention = (Q @ K.transpose(-2, -1)) / math.sqrt(self.DimHead)
+        ScoreAttention = ScoreAttention.masked_fill(mask==0, -1e9)
+        ScoreAttention = F.softmax(ScoreAttention, dim=-1)
+        ScoreAttention = self.Dropout_Attention(ScoreAttention)
+        # k.transpose(-2,-1): 3,4,16,5
+        # (q@(k.transpose(-2,-1))): 3,4,5,5
+        Z = ScoreAttention @ V
+        # y dimension: 3,4,5,16
+        Z = Z.transpose(1, 2).contiguous().view(BatchSize, T_Decoder, self.DimAttention)
+        # y dimension: 3,5,64
+        Z = self.Dropout_Residue(self.Projection(Z))
+        return Z
+class FeedForward(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        if config[varables.DIM_FEEDFORWARD] == 0:
+            Dim_FeedForward = config[varables.DIM_ATTENTION] *4
+        else:
+            Dim_FeedForward = config[varables.DIM_FEEDFORWARD]
+        self.Linear1 = nn.Linear(config[varables.DIM_EMBEDDING], Dim_FeedForward)
+        self.GELU = nn.GELU()
+        self.Linear2 = nn.Linear(Dim_FeedForward, config[varables.DIM_EMBEDDING])
+        self.Dropout = nn.Dropout(config[varables.RATE_DROPOUT])
+    def forward(self,x):
+        x = self.Linear1(x)
+        x = self.GELU   (x)
+        x = self.Dropout(x)
+        x = self.Linear2(x)
+        return x
+class EncoderBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.LayerNorm1      = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.LayerNorm2      = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Dropout2 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Attention       = Attention(  config)
+        self.FeedForward     = FeedForward(config)
+    def forward(self, X_Encoder,Mask_Encoder):
+        X_Encoder = self.Dropout1(X_Encoder + self.Attention  (self.LayerNorm1(X_Encoder), None, Mask_Encoder))
+        X_Encoder = self.Dropout2(X_Encoder + self.FeedForward(self.LayerNorm2(X_Encoder)))
+        return X_Encoder
+class DecoderBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.LayerNorm1      = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.LayerNorm2      = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.LayerNorm3      = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Dropout2 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Dropout3 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.AttentionMasked = Attention(  config)
+        self.AttentionCross  = Attention(  config)
+        self.FeedForward     = FeedForward(config)
+    def forward(self, X_Encoder,X_Decoder,Mask_Cross,Mask_Decoder):
+        X_Decoder = self.Dropout1(X_Decoder + self.AttentionMasked(self.LayerNorm1(X_Decoder), None,                       Mask_Decoder))
+        X_Decoder = self.Dropout2(X_Decoder + self.AttentionCross (                X_Encoder,  self.LayerNorm2(X_Decoder), Mask_Cross  ))
+        X_Decoder = self.Dropout3(X_Decoder + self.FeedForward    (self.LayerNorm3(X_Decoder)                                          ))
+        return X_Decoder
+class Model(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        # Varables
+        self.Dim_Attention = config[varables.DIM_ATTENTION]
+        self.Token_Padding_Encoder = config["Token_Padding_Encoder"]
+        self.Token_Padding_Decoder = config["Token_Padding_Decoder"]
+        # Embedding and positional encoding layers
+        self.Embedding_Encoder = nn.Embedding(len(config["vocab_encoder"]), config[varables.DIM_ATTENTION])
+        self.Embedding_Decoder = nn.Embedding(len(config["vocab_decoder"]), config[varables.DIM_ATTENTION])
+        self.pos_emb = PositionalEncoder(config)
+        # Dropout and normalization layers
+        self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Dropout2 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.LayerNorm1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.LayerNorm2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        # Transformer layers
+        self.encoder_blocks = nn.ModuleList([EncoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        # Output layer
+        self.head = nn.Linear(config[varables.DIM_ATTENTION], len(config["vocab_decoder"]), bias=False)
+        # Init
+        self.apply(self._init_weights)
+        self.optimizer = None
+        # logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
+    def _init_weights(self, module):
+        for p in module.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+        # if isinstance(module, (nn.Linear, nn.Embedding)):
+        #     module.weight.data.normal_(mean=0.0, std=0.02)
+        #     if isinstance(module, nn.Linear) and module.bias is not None:
+        #         module.bias.data.zero_()
+        # elif isinstance(module, nn.LayerNorm):
+        #     module.bias.data.zero_()
+        #     module.weight.data.fill_(1.0)
+    def init_optimizers(self,train_config):
+        optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
+        return optimizer
+    def init_scheduler(self,train_config):
+        scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
+        return scheduler
+    def get_collate_fn(self, vocab_encoder,vocab_decoder):
+        def collate(results):
+            X_Encoder = [a[0] for a in results]
+            X_Decoder = [a[1] for a in results]
+            boundary = -1
+            max_len_x = max([len(a) for a in X_Encoder])
+            max_len_y = max([len(a) for a in X_Decoder])
+            x = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in X_Encoder],dtype=torch.long)
+            y = torch.tensor([(a+[vocab_decoder[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in X_Decoder],dtype=torch.long)
+            return x,y,boundary
+        return collate
+    def generate_masks(self,X_Encoder, X_Decoder):
+        # Generate encoder, decoder, cross masks
+        T = X_Decoder.shape[1]
+        Mask_Encoder = (X_Encoder != self.Token_Padding_Encoder).unsqueeze(-2).unsqueeze(-2)
+        Mask_Decoder = (X_Decoder != self.Token_Padding_Decoder).unsqueeze(-2).unsqueeze(-2).repeat(1,1,T,1)
+        Mask_Cross   = (X_Encoder != self.Token_Padding_Encoder).unsqueeze(-2).unsqueeze(-2)
+        mask_tril = torch.tril(torch.ones(T, T)).view(1, 1, T, T).to(Mask_Decoder.device)
+        Mask_Decoder = Mask_Decoder.masked_fill(mask_tril==0,0)
+        return Mask_Encoder,Mask_Decoder,Mask_Cross
+    def forward(self, X_Encoder, X_Decoder, Y_Decoder_Ref=None,boundary=None):
+        Mask_Encoder, Mask_Decoder,Mask_Cross = self.generate_masks(X_Encoder, X_Decoder)
+        # preprocess
+        X_Encoder = self.Dropout1(self.Embedding_Encoder(X_Encoder) * math.sqrt(self.Dim_Attention) + self.pos_emb(X_Encoder.size(1)))
+        X_Decoder = self.Dropout2(self.Embedding_Decoder(X_Decoder) * math.sqrt(self.Dim_Attention) + self.pos_emb(X_Decoder.size(1)))
+        #### Now X_Encoder: BatchSize, SequenceLength, DimAttention
+        # Encoder blocks
+        for encoder_block in self.encoder_blocks:
+            X_Encoder = encoder_block(X_Encoder,Mask_Encoder)
+        X_Encoder = self.LayerNorm1(X_Encoder)
+        # Decoder blocks
+        for decoder_block in self.decoder_blocks:
+            X_Decoder = decoder_block(X_Encoder,X_Decoder,Mask_Cross,Mask_Decoder)
+        X_Decoder = self.LayerNorm2(X_Decoder)
+        Y_Decoder_Logits = self.head(X_Decoder)
+        loss = None
+        if Y_Decoder_Ref is not None:
+            loss = F.cross_entropy(Y_Decoder_Logits.view(-1, Y_Decoder_Logits.size(-1)), Y_Decoder_Ref.view(-1),ignore_index=self.Token_Padding_Decoder)
+        return Y_Decoder_Logits, loss
+    # def generate_masks(self,X_Encoder, X_Decoder):
+    #     # Generate encoder, decoder, cross masks
+    #     Mask_Encoder = (X_Encoder != self.Token_Padding_Encoder).unsqueeze(-2).int().cpu()
+    #     Mask_Decoder = (X_Decoder != self.Token_Padding_Decoder).unsqueeze(-2).int().cpu()
+    #     Mask_Cross   = Mask_Decoder.unsqueeze(-1) @ Mask_Encoder.unsqueeze(-2)
+    #     Mask_Encoder = Mask_Encoder.unsqueeze(-1) @ Mask_Encoder.unsqueeze(-2)
+    #     Mask_Decoder = Mask_Decoder.unsqueeze(-1) @ Mask_Decoder.unsqueeze(-2)
+    #     T = X_Decoder.shape[1]
+    #     mask_tril = torch.tril(torch.ones(T, T)).view(1, 1, T, T)
+    #     Mask_Decoder = Mask_Decoder.masked_fill(mask_tril==0,0)
+    #     Mask_Encoder = Mask_Encoder.to(X_Encoder.device)
+    #     Mask_Decoder = Mask_Decoder.to(X_Decoder.device)
+    #     Mask_Cross = Mask_Cross.to(X_Encoder.device)
+    #     return Mask_Encoder,Mask_Decoder,Mask_Cross

SCMG/models/Reinvent/sampler.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import random
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+def set_seed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+def top_k_logits(logits, k):
+    v, ix = torch.topk(logits, k)
+    out = logits.clone()
+    out[out < v[:, [-1]]] = -float('Inf')
+    return out
+@torch.no_grad()
+def sample(model, x, steps, temperature=1.0, sample=False, top_k=None):
+    block_size = model.get_block_size()
+    model.eval()
+    for k in range(steps):
+        x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
+        logits, _ = model(x_cond)
+        logits = logits[:, -1, :] / temperature
+        if top_k is not None:
+            logits = top_k_logits(logits, top_k)
+        probs = F.softmax(logits, dim=-1)
+        if sample:
+            ix = torch.multinomial(probs, num_samples=1)
+        else:
+            _, ix = torch.topk(probs, k=1, dim=-1)
+        x = torch.cat((x, ix), dim=1)
+    return x
+@torch.no_grad()
+def sample(model, x, steps, temperature=1.0,boundary=None):
+    block_size = model.get_block_size()
+    model.eval()
+    for k in range(steps):
+        x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
+        logits, _ = model(x_cond,boundary=boundary)
+        logits = logits[:, -1, :] / temperature
+        probs = F.softmax(logits, dim=-1)
+        ix = torch.multinomial(probs, num_samples=1)
+        x = torch.cat((x, ix), dim=1)
+    return x
+'L_5*C(=O)NCc1cccc(OC)c1.*c1nsc2ccccc12COc1cccc(CNC(=O)c2cccc(NC(=O)c3nsc4ccccc34)c2)c1'
+# for i in range(1,21):
+def sample_L(i,option='string'):
+    # i=2
+    prefix = 'L_'+str(i)
+    string_input = prefix + '*O=C1NN=Cc2c1cccc2.*O=C(C1CC1)N1CCNCC1'
+    array_input = [vocab[a] for a in ['<bos>'] + list(string_input)]
+    boundary = [len(array_input)]
+    tensor_input = torch.tensor(array_input,device='cuda').unsqueeze(0).repeat(32,1)
+    boundary = boundary*32
+    tensor_output = sample(model,tensor_input,250,boundary=boundary)
+    strings_output = []
+    for j in range(tensor_output.shape[0]):
+        list_string_output = [inv[a] for a in tensor_output[j,boundary[j]:].cpu().numpy() if a != vocab['<pad>']]
+        # if list_string_output[0] == '<bos>':
+        #     list_string_output = list_string_output[1:]
+        if list_string_output[-1] == '<eos>':
+            list_string_output = list_string_output[:-1]
+        string_output = ''.join(list_string_output)
+        strings_output.append(string_output)
+        print(string_output)
+    for j in range(tensor_output.shape[0]):
+        if test_valid(strings_output[j]):
+            print(1)
+        else:
+            print(0)
+    # logits,_ = model(tensor_input,boundary=boundary)
+['<bos>', 'L', '_', '5', '*', 'C', '(', '=', 'O', ')', 'N', 'C', 'c', '1', 'c', 'c', 'c', 'c', '(', 'O', 'C', ')', 'c', '1', '.', '*', 'c', '1', 'n', 's', 'c', '2', 'c', 'c', 'c', 'c', 'c', '1', '2', 'C', 'O', 'c', '1', 'c', 'c', 'c', 'c', '(', 'C', 'N', 'C', '(', '=', 'O', ')', 'c', '2', 'c', 'c', 'c', 'c', '(', 'N', 'C', '(', '=', 'O', ')', 'c', '3', 'n', 's', 'c', '4', 'c', 'c', 'c', 'c', 'c', '3', '4', ')', 'c', '2', ')', 'c', '1', '<eos>']

SCMG/models/Reinvent_Scaffold_Decorator/__init__.py ADDED Viewed

File without changes

SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (196 Bytes). View file

SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/model copy 2.cpython-310.pyc ADDED Viewed

Binary file (14.4 kB). View file

SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/model copy.cpython-310.pyc ADDED Viewed

Binary file (8.41 kB). View file

SCMG/models/Reinvent_Scaffold_Decorator/__pycache__/sampler.cpython-310.pyc ADDED Viewed

Binary file (3.19 kB). View file

SCMG/models/Reinvent_Scaffold_Decorator/model copy 2.py ADDED Viewed

	@@ -0,0 +1,420 @@

+import math
+import logging
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+logger = logging.getLogger(__name__)
+from SCMG.config import varables
+# class ModelConfig():
+#     rate_dropout_embedding = 0.1
+#     rate_dropout_residue = 0.1
+#     rate_dropout_attention = 0.1
+#     block_size=125
+#     def __init__(self, size_vocab, **kwargs):
+#         self.size_vocab = size_vocab
+#         for k,v in kwargs.items():
+#             setattr(self, k, v)
+class CausalSelfAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
+        self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
+        self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+                                     .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+        self.n_head = config[varables.NUM_HEADS]
+        self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
+        self.attention_features = config[varables.DIM_ATTENTION]
+    def forward(self, x, layer_past=None):
+        B, T, C = x.size()
+        k =   self.key(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
+        q = self.query(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
+        v = self.value(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+        att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+        att = self.dropout_attention(att)
+        y = att @ v
+        y = y.transpose(1, 2).contiguous().view(B, T, self.attention_features)
+        y = self.dropout_residue(self.projection(y))
+        return y
+class CrossAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
+        self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
+        self.n_head = config[varables.NUM_HEADS]
+        self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
+        self.attention_features = config[varables.DIM_ATTENTION]
+        self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+                                .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+    def forward(self, x_encoder,x_decoder, layer_past=None):
+        B_encoder, T_encoder, C_encoder = x_encoder.size()
+        B_decoder, T_decoder, C_decoder = x_decoder.size()
+        k = self.key(  x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
+        q = self.query(x_decoder).view(B_encoder, T_decoder, self.n_head,self.single_head_dim).transpose(1, 2)
+        v = self.value(x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+        att = att.masked_fill(self.mask[:,:,:T_decoder,:T_encoder] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+        att = self.dropout_attention(att)
+        y = att @ v
+        y = y.transpose(1, 2).contiguous().view(B_encoder, T_decoder, self.attention_features)
+        y = self.dropout_residue(self.projection(y))
+        return y
+class EncoderBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.attn = CausalSelfAttention(config)
+        self.mlp = nn.Sequential(
+            nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
+            nn.GELU(),
+            nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
+            nn.Dropout(config[varables.RATE_DROPOUT]),
+        )
+    def forward(self, x):
+         # = y_input
+        x = x + self.attn(self.ln1(x))
+        x = x + self.mlp(self.ln2(x))
+        return x
+class DecoderBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.masked_attn = CausalSelfAttention(config)
+        self.cross_attn = CrossAttention(config)
+        self.mlp = nn.Sequential(
+            nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
+            nn.GELU(),
+            nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
+            nn.Dropout(config[varables.RATE_DROPOUT]),
+        )
+    def forward(self, x_encoder,x):
+         # = y_input
+        x = x + self.masked_attn(self.ln1(x))
+        x = x + self.cross_attn(x_encoder,self.ln1(x))
+        x = x + self.mlp(self.ln2(x))
+        return x
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+class Norm(nn.Module):
+    def __init__(self, d_model, eps = 1e-6):
+        super().__init__()
+        self.size = d_model
+        # create two learnable parameters to calibrate normalisation
+        self.alpha = nn.Parameter(torch.ones(self.size))
+        self.bias = nn.Parameter(torch.zeros(self.size))
+        self.eps = eps
+    def forward(self, x):
+        norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) \
+        / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias
+        return norm
+def attention(q, k, v, d_k, mask=None, dropout=None):
+    scores = torch.matmul(q, k.transpose(-2, -1)) /  math.sqrt(d_k)
+    if mask is not None:
+        mask = mask.unsqueeze(1)
+        scores = scores.masked_fill(mask == 0, -1e9)
+    scores = F.softmax(scores, dim=-1)
+    if dropout is not None:
+        scores = dropout(scores)
+    output = torch.matmul(scores, v)
+    return output
+class MultiHeadAttention(nn.Module):
+    def __init__(self, heads, d_model, dropout = 0.1):
+        super().__init__()
+        self.d_model = d_model
+        self.d_k = d_model // heads
+        self.h = heads
+        self.q_linear = nn.Linear(d_model, d_model)
+        self.v_linear = nn.Linear(d_model, d_model)
+        self.k_linear = nn.Linear(d_model, d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.out = nn.Linear(d_model, d_model)
+    def forward(self, q, k, v, mask=None):
+        bs = q.size(0)
+        # perform linear operation and split into N heads
+        k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
+        q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
+        v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
+        # transpose to get dimensions bs * N * sl * d_model
+        k = k.transpose(1,2)
+        q = q.transpose(1,2)
+        v = v.transpose(1,2)
+        # calculate attention using function we will define next
+        scores = attention(q, k, v, self.d_k, mask, self.dropout)
+        # concatenate heads and put through final linear layer
+        concat = scores.transpose(1,2).contiguous()\
+        .view(bs, -1, self.d_model)
+        output = self.out(concat)
+        return output
+class FeedForward(nn.Module):
+    def __init__(self, d_model, d_ff=2048, dropout = 0.1):
+        super().__init__()
+        # We set d_ff as a default to 2048
+        self.linear_1 = nn.Linear(d_model, d_ff)
+        self.dropout = nn.Dropout(dropout)
+        self.linear_2 = nn.Linear(d_ff, d_model)
+    def forward(self, x):
+        x = self.dropout(F.relu(self.linear_1(x)))
+        x = self.linear_2(x)
+        return x
+import torch
+import torch.nn as nn
+import copy
+class EncoderLayer(nn.Module):
+    def __init__(self, d_model, heads, dropout=0.1):
+        super().__init__()
+        self.norm_1 = Norm(d_model)
+        self.norm_2 = Norm(d_model)
+        self.attn = MultiHeadAttention(heads, d_model, dropout=dropout)
+        self.ff = FeedForward(d_model, dropout=dropout)
+        self.dropout_1 = nn.Dropout(dropout)
+        self.dropout_2 = nn.Dropout(dropout)
+    def forward(self, x, mask):
+        x2 = self.norm_1(x)
+        x = x + self.dropout_1(self.attn(x2,x2,x2,mask))
+        x2 = self.norm_2(x)
+        x = x + self.dropout_2(self.ff(x2))
+        return x
+# build a decoder layer with two multi-head attention layers and
+# one feed-forward layer
+class DecoderLayer(nn.Module):
+    def __init__(self, d_model, heads, dropout=0.1):
+        super().__init__()
+        self.norm_1 = Norm(d_model)
+        self.norm_2 = Norm(d_model)
+        self.norm_3 = Norm(d_model)
+        self.dropout_1 = nn.Dropout(dropout)
+        self.dropout_2 = nn.Dropout(dropout)
+        self.dropout_3 = nn.Dropout(dropout)
+        self.attn_1 = MultiHeadAttention(heads, d_model, dropout=dropout)
+        self.attn_2 = MultiHeadAttention(heads, d_model, dropout=dropout)
+        self.ff = FeedForward(d_model, dropout=dropout)
+    def forward(self, x, e_outputs, src_mask, trg_mask):
+        x2 = self.norm_1(x)
+        x = x + self.dropout_1(self.attn_1(x2, x2, x2, trg_mask))
+        x2 = self.norm_2(x)
+        x = x + self.dropout_2(self.attn_2(x2, e_outputs, e_outputs, \
+        src_mask))
+        x2 = self.norm_3(x)
+        x = x + self.dropout_3(self.ff(x2))
+        return x
+import torch
+import torch.nn as nn
+import math
+from torch.autograd import Variable
+class Embedder(nn.Module):
+    def __init__(self, vocab_size, d_model):
+        super().__init__()
+        self.d_model = d_model
+        self.embed = nn.Embedding(vocab_size, d_model)
+    def forward(self, x):
+        return self.embed(x)
+class PositionalEncoder(nn.Module):
+    def __init__(self, d_model, max_seq_len = 200, dropout = 0.1):
+        super().__init__()
+        self.d_model = d_model
+        self.dropout = nn.Dropout(dropout)
+        # create constant 'pe' matrix with values dependant on
+        # pos and i
+        pe = torch.zeros(max_seq_len, d_model)
+        for pos in range(max_seq_len):
+            for i in range(0, d_model, 2):
+                pe[pos, i] = \
+                math.sin(pos / (10000 ** ((2 * i)/d_model)))
+                pe[pos, i + 1] = \
+                math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
+        pe = pe.unsqueeze(0)
+        self.register_buffer('pe', pe)
+    def forward(self, x):
+        # make embeddings relatively larger
+        x = x * math.sqrt(self.d_model)
+        #add constant to embedding
+        seq_len = x.size(1)
+        pe = Variable(self.pe[:,:seq_len], requires_grad=False)
+        if x.is_cuda:
+            pe.cuda()
+        x = x + pe
+        return self.dropout(x)
+def get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+class Encoder(nn.Module):
+    def __init__(self, vocab_size, d_model, N, heads, dropout):
+        super().__init__()
+        self.N = N
+        self.embed = Embedder(vocab_size, d_model)
+        self.pe = PositionalEncoder(d_model, dropout=dropout)
+        self.layers = get_clones(EncoderLayer(d_model, heads, dropout), N)
+        self.norm = Norm(d_model)
+    def forward(self, src, mask):
+        x = self.embed(src)
+        x = self.pe(x)
+        for i in range(self.N):
+            x = self.layers[i](x, mask)
+        return self.norm(x)
+class Decoder(nn.Module):
+    def __init__(self, vocab_size, d_model, N, heads, dropout):
+        super().__init__()
+        self.N = N
+        self.embed = Embedder(vocab_size, d_model)
+        self.pe = PositionalEncoder(d_model, dropout=dropout)
+        self.layers = get_clones(DecoderLayer(d_model, heads, dropout), N)
+        self.norm = Norm(d_model)
+    def forward(self, trg, e_outputs, src_mask, trg_mask):
+        x = self.embed(trg)
+        x = self.pe(x)
+        for i in range(self.N):
+            x = self.layers[i](x, e_outputs, src_mask, trg_mask)
+        return self.norm(x)
+class Model(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.encoder = Encoder(len(config["vocab_encoder"]), config[varables.DIM_ATTENTION], config[varables.NUM_LAYERS], config[varables.NUM_HEADS], config[varables.RATE_DROPOUT])
+        self.decoder = Decoder(len(config["vocab_decoder"]), config[varables.DIM_ATTENTION], config[varables.NUM_LAYERS], config[varables.NUM_HEADS], config[varables.RATE_DROPOUT])
+        self.out = nn.Linear(config[varables.DIM_ATTENTION], len(config["vocab_decoder"]))
+        # self.tok_emb = nn.Embedding(config[varables.SIZE_VOCAB], config[varables.DIM_EMBEDDING])
+        # self.pos_emb = nn.Parameter(torch.zeros(1, config[varables.SIZE_BLOCK], config[varables.DIM_EMBEDDING]))
+        # self.drop = nn.Dropout(config[varables.RATE_DROPOUT])
+        # self.encoder_blocks = nn.ModuleList([EncoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        # self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        # self.blocks = nn.Sequential(*[DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        # self.ln_f = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        # self.head = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.SIZE_VOCAB], bias=False)
+        # self.block_size = config[varables.SIZE_BLOCK]
+        # self.apply(self._init_weights)
+        # logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
+        self.optimizer = None
+    def get_block_size(self):
+        return self.block_size
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=0.02)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+    def init_optimizers(self,train_config):
+        optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
+        return optimizer
+    def init_scheduler(self,train_config):
+        scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
+        return scheduler
+    def get_collate_fn(self, vocab_encoder,vocab_decoder):
+        def collate(results):
+            x_in = [a[0] for a in results]
+            y_in = [a[1] for a in results]
+            boundary = -1
+            max_len_x = max([len(a) for a in x_in])
+            max_len_y = max([len(a) for a in y_in])
+            x = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in x_in],dtype=torch.long)
+            y = torch.tensor([(a+[vocab_decoder[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in y_in],dtype=torch.long)
+            return x,y,boundary
+        return collate
+    def forward(self, src, trg, trg_out, boundary=None):
+        src_mask = None
+        trg_mask = torch.tril(torch.ones(trg.shape[1], trg.shape[1])).view(1, 1, trg.shape[1], trg.shape[1]).to(trg.device)
+        e_outputs = self.encoder(src, src_mask)
+        d_output = self.decoder(trg, e_outputs, src_mask, trg_mask)
+        logits = self.out(d_output)
+        loss = None
+        if trg_out is not None:
+            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), trg_out.view(-1))
+        return logits, loss
+# mark test

SCMG/models/Reinvent_Scaffold_Decorator/model copy.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import math
+import logging
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+logger = logging.getLogger(__name__)
+from SCMG.config import varables
+# class ModelConfig():
+#     rate_dropout_embedding = 0.1
+#     rate_dropout_residue = 0.1
+#     rate_dropout_attention = 0.1
+#     block_size=125
+#     def __init__(self, size_vocab, **kwargs):
+#         self.size_vocab = size_vocab
+#         for k,v in kwargs.items():
+#             setattr(self, k, v)
+class CausalSelfAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
+        self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
+        self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+                                     .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+        self.n_head = config[varables.NUM_HEADS]
+        self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
+        self.attention_features = config[varables.DIM_ATTENTION]
+    def forward(self, x, layer_past=None):
+        B, T, C = x.size()
+        k =   self.key(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
+        q = self.query(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
+        v = self.value(x).view(B, T, self.n_head,self.single_head_dim).transpose(1, 2)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+        att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+        att = self.dropout_attention(att)
+        y = att @ v
+        y = y.transpose(1, 2).contiguous().view(B, T, self.attention_features)
+        y = self.dropout_residue(self.projection(y))
+        return y
+class CrossAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config[varables.DIM_ATTENTION] % config[varables.NUM_HEADS] == 0
+        self.key = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.query = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.value = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_ATTENTION])
+        self.dropout_attention = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.dropout_residue = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.projection = nn.Linear(config[varables.DIM_ATTENTION], config[varables.DIM_EMBEDDING])
+        self.n_head = config[varables.NUM_HEADS]
+        self.single_head_dim = config[varables.DIM_ATTENTION] // self.n_head
+        self.attention_features = config[varables.DIM_ATTENTION]
+        self.register_buffer("mask", torch.tril(torch.ones(config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+                                .view(1, 1, config[varables.SIZE_BLOCK], config[varables.SIZE_BLOCK]))
+    def forward(self, x_encoder,x_decoder, layer_past=None):
+        B_encoder, T_encoder, C_encoder = x_encoder.size()
+        B_decoder, T_decoder, C_decoder = x_decoder.size()
+        k = self.key(  x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
+        q = self.query(x_decoder).view(B_encoder, T_decoder, self.n_head,self.single_head_dim).transpose(1, 2)
+        v = self.value(x_encoder).view(B_encoder, T_encoder, self.n_head,self.single_head_dim).transpose(1, 2)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+        att = att.masked_fill(self.mask[:,:,:T_decoder,:T_encoder] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+        att = self.dropout_attention(att)
+        y = att @ v
+        y = y.transpose(1, 2).contiguous().view(B_encoder, T_decoder, self.attention_features)
+        y = self.dropout_residue(self.projection(y))
+        return y
+class EncoderBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.attn = CausalSelfAttention(config)
+        self.mlp = nn.Sequential(
+            nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
+            nn.GELU(),
+            nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
+            nn.Dropout(config[varables.RATE_DROPOUT]),
+        )
+    def forward(self, x):
+         # = y_input
+        x = x + self.attn(self.ln1(x))
+        x = x + self.mlp(self.ln2(x))
+        return x
+class DecoderBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.ln2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.masked_attn = CausalSelfAttention(config)
+        self.cross_attn = CrossAttention(config)
+        self.mlp = nn.Sequential(
+            nn.Linear(config[varables.DIM_EMBEDDING], config[varables.DIM_FEEDFORWARD]),
+            nn.GELU(),
+            nn.Linear(config[varables.DIM_FEEDFORWARD], config[varables.DIM_EMBEDDING]),
+            nn.Dropout(config[varables.RATE_DROPOUT]),
+        )
+    def forward(self, x_encoder,x):
+         # = y_input
+        x = x + self.masked_attn(self.ln1(x))
+        x = x + self.cross_attn(x_encoder,self.ln1(x))
+        x = x + self.mlp(self.ln2(x))
+        return x
+class Model(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.tok_emb = nn.Embedding(config[varables.SIZE_VOCAB], config[varables.DIM_EMBEDDING])
+        self.pos_emb = nn.Parameter(torch.zeros(1, config[varables.SIZE_BLOCK], config[varables.DIM_EMBEDDING]))
+        self.drop = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.encoder_blocks = nn.ModuleList([EncoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        # self.blocks = nn.Sequential(*[DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        self.ln_f = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.head = nn.Linear(config[varables.DIM_EMBEDDING], config[varables.SIZE_VOCAB], bias=False)
+        self.block_size = config[varables.SIZE_BLOCK]
+        self.apply(self._init_weights)
+        logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
+        self.optimizer = None
+    def get_block_size(self):
+        return self.block_size
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=0.02)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+    def init_optimizers(self,train_config):
+        optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
+        return optimizer
+    def init_scheduler(self,train_config):
+        scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
+        return scheduler
+    def get_collate_fn(self, vocab):
+        def collate(results):
+            x_in = [a[0] for a in results]
+            y_in = [a[1] for a in results]
+            boundary = -1
+            max_len_x = max([len(a) for a in x_in])
+            max_len_y = max([len(a) for a in y_in])
+            x = torch.tensor([(a+[vocab[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in x_in],dtype=torch.long)
+            y = torch.tensor([(a+[vocab[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in y_in],dtype=torch.long)
+            return x,y,boundary
+        return collate
+    def forward(self, x_in, y_in, y_out=None,boundary=None):
+        x_in = self.drop(self.tok_emb(x_in) + self.pos_emb[:, :x_in.size()[1], :])
+        y_in = self.drop(self.tok_emb(y_in) + self.pos_emb[:, :y_in.size()[1], :])
+        #
+        for encoder_block in self.encoder_blocks:
+            x_in = encoder_block(x_in)
+        x_in = self.ln_f(x_in)
+        for decoder_block in self.decoder_blocks:
+            y_in = decoder_block(x_in,y_in)
+        y_in = self.ln_f(y_in)
+        logits = self.head(y_in)
+        loss = None
+        if y_out is not None:
+            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), y_out.view(-1))
+        return logits, loss
+# mark test

SCMG/models/Reinvent_Scaffold_Decorator/model.py ADDED Viewed

	@@ -0,0 +1,276 @@

+Skip to content
+    Why GitHub?
+Team
+Enterprise
+Explore
+Marketplace
+Pricing
+Sign in
+Sign up
+undeadpixel /
+reinvent-scaffold-decorator
+Public
+Code
+Issues 3
+Pull requests
+Actions
+Projects
+Wiki
+Security
+    Insights
+reinvent-scaffold-decorator/models/model.py /
+Arús-Pous, Josep updated to revised version
+Latest commit 37d0a8a on May 8, 2020
+History
+0 contributors
+136 lines (118 sloc) 5.75 KB
+"""
+Model class.
+"""
+import torch
+import torch.nn as tnn
+import models.decorator as mdec
+class DecoratorModel:
+    def __init__(self, vocabulary, decorator, max_sequence_length=256, no_cuda=False, mode="train"):
+        """
+        Implements the likelihood and sampling functions of the decorator model.
+        :param vocabulary: A DecoratorVocabulary instance with the vocabularies of both the encoder and decoder.
+        :param network_params: A dict with parameters for the encoder and decoder networks.
+        :param decorator: An decorator network instance.
+        :param max_sequence_length: Maximium number of tokens allowed to sample.
+        :param no_cuda: Forces the model not to use CUDA, even if it is available.
+        :param mode: Mode in which the model should be initialized.
+        :return:
+        """
+        self.vocabulary = vocabulary
+        self.max_sequence_length = max_sequence_length
+        self.network = decorator
+        if torch.cuda.is_available() and not no_cuda:
+            self.network.cuda()
+        self._nll_loss = tnn.NLLLoss(reduction="none", ignore_index=0)
+        self.set_mode(mode)
+    @classmethod
+    def load_from_file(cls, path, mode="train"):
+        """
+        Loads a model from a single file
+        :param path: Path to the saved model.
+        :param mode: Mode in which the model should be initialized.
+        :return: An instance of the RNN.
+        """
+        data = torch.load(path)
+        decorator = mdec.Decorator(**data["decorator"]["params"])
+        decorator.load_state_dict(data["decorator"]["state"])
+        model = DecoratorModel(
+            decorator=decorator,
+            mode=mode,
+            **data["model"]
+        )
+        return model
+    def save(self, path):
+        """
+        Saves the model to a file.
+        :param path: Path to the file which the model will be saved to.
+        """
+        save_dict = {
+            'model': {
+                'vocabulary': self.vocabulary,
+                'max_sequence_length': self.max_sequence_length
+            },
+            'decorator': {
+                'params': self.network.get_params(),
+                'state': self.network.state_dict()
+            }
+        }
+        torch.save(save_dict, path)
+    def set_mode(self, mode):
+        """
+        Changes the mode of the RNN to training or eval.
+        :param mode: Mode to change to (training, eval)
+        :return: The model instance.
+        """
+        if mode == "sampling" or mode == "eval":
+            self.network.eval()
+        else:
+            self.network.train()
+        return self
+    def likelihood(self, scaffold_seqs, scaffold_seq_lengths, decoration_seqs, decoration_seq_lengths, with_attention_weights=False):
+        """
+        Retrieves the likelihood of a scaffold and its respective decorations.
+        :param scaffold_seqs: (batch, seq) A batch of padded scaffold sequences.
+        :param scaffold_seq_lengths: The length of the scaffold sequences (for packing purposes).
+        :param decoration_seqs: (batch, seq) A batch of decorator sequences.
+        :param decoration_seq_lengths: The length of the decorator sequences (for packing purposes).
+        :return:  (batch) Log likelihood for each item in the batch.
+        """
+        # NOTE: the decoration_seq_lengths have a - 1 to prevent the end token to be forward-passed.
+        logits, attention_weights = self.network(scaffold_seqs, scaffold_seq_lengths, decoration_seqs,
+                                                 decoration_seq_lengths - 1)  # (batch, seq - 1, voc)
+        log_probs = logits.log_softmax(dim=2).transpose(1, 2)  # (batch, voc, seq - 1)
+        logits = self._nll_loss(log_probs, decoration_seqs[:, 1:]).sum(dim=1)  # (batch)
+        if with_attention_weights:
+            return logits, attention_weights
+        else:
+            return logits
+    @torch.no_grad()
+    def sample_decorations(self, scaffold_seqs, scaffold_seq_lengths):
+        """
+        Samples as many decorations as scaffolds in the tensor.
+        :param scaffold_seqs: A tensor with the scaffolds to sample already encoded and padded.
+        :param scaffold_seq_lengths: A tensor with the length of the scaffolds.
+        :return: An iterator with (scaffold_smi, decoration_smi, nll) triplets.
+        """
+        batch_size = scaffold_seqs.size(0)
+        input_vector = torch.full(
+            (batch_size, 1), self.vocabulary.decoration_vocabulary["^"], dtype=torch.long).cuda()  # (batch, 1)
+        seq_lengths = torch.ones(batch_size)  # (batch)
+        encoder_padded_seqs, hidden_states = self.network.forward_encoder(scaffold_seqs, scaffold_seq_lengths)
+        nlls = torch.zeros(batch_size).cuda()
+        not_finished = torch.ones(batch_size, 1, dtype=torch.long).cuda()
+        sequences = []
+        for _ in range(self.max_sequence_length - 1):
+            logits, hidden_states, _ = self.network.forward_decoder(
+                input_vector, seq_lengths, encoder_padded_seqs, hidden_states)  # (batch, 1, voc)
+            probs = logits.softmax(dim=2).squeeze()  # (batch, voc)
+            log_probs = logits.log_softmax(dim=2).squeeze()  # (batch, voc)
+            input_vector = torch.multinomial(probs, 1)*not_finished  # (batch, 1)
+            sequences.append(input_vector)
+            nlls += self._nll_loss(log_probs, input_vector.squeeze())
+            not_finished = (input_vector > 1).type(torch.long)  # 0 is padding, 1 is end token
+            if not_finished.sum() == 0:
+                break
+        decoration_smiles = [self.vocabulary.decode_decoration(seq)
+                             for seq in torch.cat(sequences, 1).data.cpu().numpy()]
+        scaffold_smiles = [self.vocabulary.decode_scaffold(seq) for seq in scaffold_seqs.data.cpu().numpy()]
+        return zip(scaffold_smiles, decoration_smiles, nlls.data.cpu().numpy().tolist())
+class Model(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        # Varables
+        self.Dim_Attention = config[varables.DIM_ATTENTION]
+        self.Token_Padding_Encoder = config["Token_Padding_Encoder"]
+        self.Token_Padding_Decoder = config["Token_Padding_Decoder"]
+        # Embedding and positional encoding layers
+        self.Embedding_Encoder = nn.Embedding(len(config["vocab_encoder"]), config[varables.DIM_ATTENTION])
+        self.Embedding_Decoder = nn.Embedding(len(config["vocab_decoder"]), config[varables.DIM_ATTENTION])
+        self.pos_emb = PositionalEncoder(config)
+        # Dropout and normalization layers
+        self.Dropout1 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.Dropout2 = nn.Dropout(config[varables.RATE_DROPOUT])
+        self.LayerNorm1 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        self.LayerNorm2 = nn.LayerNorm(config[varables.DIM_EMBEDDING])
+        # Transformer layers
+        self.encoder_blocks = nn.ModuleList([EncoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        self.decoder_blocks = nn.ModuleList([DecoderBlock(config) for _ in range(config[varables.NUM_LAYERS])])
+        # Output layer
+        self.head = nn.Linear(config[varables.DIM_ATTENTION], len(config["vocab_decoder"]), bias=False)
+        # Init
+        self.apply(self._init_weights)
+        self.optimizer = None
+        # logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
+    def _init_weights(self, module):
+        for p in module.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+        # if isinstance(module, (nn.Linear, nn.Embedding)):
+        #     module.weight.data.normal_(mean=0.0, std=0.02)
+        #     if isinstance(module, nn.Linear) and module.bias is not None:
+        #         module.bias.data.zero_()
+        # elif isinstance(module, nn.LayerNorm):
+        #     module.bias.data.zero_()
+        #     module.weight.data.fill_(1.0)
+    def init_optimizers(self,train_config):
+        optimizer = torch.optim.Adam(self.parameters(), lr=train_config[varables.RATE_LEARNING])
+        return optimizer
+    def init_scheduler(self,train_config):
+        scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=train_config[varables.SIZE_STEP], gamma=train_config[varables.GAMMA])
+        return scheduler
+    def get_collate_fn(self, vocab_encoder,vocab_decoder):
+        def collate(results):
+            X_Encoder = [a[0] for a in results]
+            X_Decoder = [a[1] for a in results]
+            boundary = -1
+            max_len_x = max([len(a) for a in X_Encoder])
+            max_len_y = max([len(a) for a in X_Decoder])
+            x = torch.tensor([(a+[vocab_encoder[varables.TOKEN_PAD]]*(max_len_x-len(a))) for a in X_Encoder],dtype=torch.long)
+            y = torch.tensor([(a+[vocab_decoder[varables.TOKEN_PAD]]*(max_len_y-len(a))) for a in X_Decoder],dtype=torch.long)
+            return x,y,boundary
+        return collate
+    def generate_masks(self,X_Encoder, X_Decoder):
+        # Generate encoder, decoder, cross masks
+        T = X_Decoder.shape[1]
+        Mask_Encoder = (X_Encoder != self.Token_Padding_Encoder).unsqueeze(-2).unsqueeze(-2)
+        Mask_Decoder = (X_Decoder != self.Token_Padding_Decoder).unsqueeze(-2).unsqueeze(-2).repeat(1,1,T,1)
+        Mask_Cross   = (X_Encoder != self.Token_Padding_Encoder).unsqueeze(-2).unsqueeze(-2)
+        mask_tril = torch.tril(torch.ones(T, T)).view(1, 1, T, T).to(Mask_Decoder.device)
+        Mask_Decoder = Mask_Decoder.masked_fill(mask_tril==0,0)
+        return Mask_Encoder,Mask_Decoder,Mask_Cross
+    def forward(self, X_Encoder, X_Decoder, Y_Decoder_Ref=None,boundary=None):
+        Mask_Encoder, Mask_Decoder,Mask_Cross = self.generate_masks(X_Encoder, X_Decoder)
+        # preprocess
+        X_Encoder = self.Dropout1(self.Embedding_Encoder(X_Encoder) * math.sqrt(self.Dim_Attention) + self.pos_emb(X_Encoder.size(1)))
+        X_Decoder = self.Dropout2(self.Embedding_Decoder(X_Decoder) * math.sqrt(self.Dim_Attention) + self.pos_emb(X_Decoder.size(1)))
+        #### Now X_Encoder: BatchSize, SequenceLength, DimAttention
+        # Encoder blocks
+        for encoder_block in self.encoder_blocks:
+            X_Encoder = encoder_block(X_Encoder,Mask_Encoder)
+        X_Encoder = self.LayerNorm1(X_Encoder)
+        # Decoder blocks
+        for decoder_block in self.decoder_blocks:
+            X_Decoder = decoder_block(X_Encoder,X_Decoder,Mask_Cross,Mask_Decoder)
+        X_Decoder = self.LayerNorm2(X_Decoder)
+        Y_Decoder_Logits = self.head(X_Decoder)
+        loss = None
+        if Y_Decoder_Ref is not None:
+            loss = F.cross_entropy(Y_Decoder_Logits.view(-1, Y_Decoder_Logits.size(-1)), Y_Decoder_Ref.view(-1),ignore_index=self.Token_Padding_Decoder)
+        return Y_Decoder_Logits, loss
+    # def generate_masks(self,X_Encoder, X_Decoder):
+    #     # Generate encoder, decoder, cross masks
+    #     Mask_Encoder = (X_Encoder != self.Token_Padding_Encoder).unsqueeze(-2).int().cpu()
+    #     Mask_Decoder = (X_Decoder != self.Token_Padding_Decoder).unsqueeze(-2).int().cpu()
+    #     Mask_Cross   = Mask_Decoder.unsqueeze(-1) @ Mask_Encoder.unsqueeze(-2)
+    #     Mask_Encoder = Mask_Encoder.unsqueeze(-1) @ Mask_Encoder.unsqueeze(-2)
+    #     Mask_Decoder = Mask_Decoder.unsqueeze(-1) @ Mask_Decoder.unsqueeze(-2)
+    #     T = X_Decoder.shape[1]
+    #     mask_tril = torch.tril(torch.ones(T, T)).view(1, 1, T, T)
+    #     Mask_Decoder = Mask_Decoder.masked_fill(mask_tril==0,0)
+    #     Mask_Encoder = Mask_Encoder.to(X_Encoder.device)
+    #     Mask_Decoder = Mask_Decoder.to(X_Decoder.device)
+    #     Mask_Cross = Mask_Cross.to(X_Encoder.device)
+    #     return Mask_Encoder,Mask_Decoder,Mask_Cross

SCMG/models/Reinvent_Scaffold_Decorator/sampler.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import random
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+def set_seed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+def top_k_logits(logits, k):
+    v, ix = torch.topk(logits, k)
+    out = logits.clone()
+    out[out < v[:, [-1]]] = -float('Inf')
+    return out
+@torch.no_grad()
+def sample(model, x, steps, temperature=1.0, sample=False, top_k=None):
+    block_size = model.get_block_size()
+    model.eval()
+    for k in range(steps):
+        x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
+        logits, _ = model(x_cond)
+        logits = logits[:, -1, :] / temperature
+        if top_k is not None:
+            logits = top_k_logits(logits, top_k)
+        probs = F.softmax(logits, dim=-1)
+        if sample:
+            ix = torch.multinomial(probs, num_samples=1)
+        else:
+            _, ix = torch.topk(probs, k=1, dim=-1)
+        x = torch.cat((x, ix), dim=1)
+    return x
+@torch.no_grad()
+def sample(model, x, steps, temperature=1.0,boundary=None):
+    block_size = model.get_block_size()
+    model.eval()
+    for k in range(steps):
+        x_cond = x if x.size(1) <= block_size else x[:, -block_size:]
+        logits, _ = model(x_cond,boundary=boundary)
+        logits = logits[:, -1, :] / temperature
+        probs = F.softmax(logits, dim=-1)
+        ix = torch.multinomial(probs, num_samples=1)
+        x = torch.cat((x, ix), dim=1)
+    return x
+'L_5*C(=O)NCc1cccc(OC)c1.*c1nsc2ccccc12COc1cccc(CNC(=O)c2cccc(NC(=O)c3nsc4ccccc34)c2)c1'
+# for i in range(1,21):
+def sample_L(i,option='string'):
+    # i=2
+    prefix = 'L_'+str(i)
+    string_input = prefix + '*O=C1NN=Cc2c1cccc2.*O=C(C1CC1)N1CCNCC1'
+    array_input = [vocab[a] for a in ['<bos>'] + list(string_input)]
+    boundary = [len(array_input)]
+    tensor_input = torch.tensor(array_input,device='cuda').unsqueeze(0).repeat(32,1)
+    boundary = boundary*32
+    tensor_output = sample(model,tensor_input,250,boundary=boundary)
+    strings_output = []
+    for j in range(tensor_output.shape[0]):
+        list_string_output = [inv[a] for a in tensor_output[j,boundary[j]:].cpu().numpy() if a != vocab['<pad>']]
+        # if list_string_output[0] == '<bos>':
+        #     list_string_output = list_string_output[1:]
+        if list_string_output[-1] == '<eos>':
+            list_string_output = list_string_output[:-1]
+        string_output = ''.join(list_string_output)
+        strings_output.append(string_output)
+        print(string_output)
+    for j in range(tensor_output.shape[0]):
+        if test_valid(strings_output[j]):
+            print(1)
+        else:
+            print(0)
+    # logits,_ = model(tensor_input,boundary=boundary)
+['<bos>', 'L', '_', '5', '*', 'C', '(', '=', 'O', ')', 'N', 'C', 'c', '1', 'c', 'c', 'c', 'c', '(', 'O', 'C', ')', 'c', '1', '.', '*', 'c', '1', 'n', 's', 'c', '2', 'c', 'c', 'c', 'c', 'c', '1', '2', 'C', 'O', 'c', '1', 'c', 'c', 'c', 'c', '(', 'C', 'N', 'C', '(', '=', 'O', ')', 'c', '2', 'c', 'c', 'c', 'c', '(', 'N', 'C', '(', '=', 'O', ')', 'c', '3', 'n', 's', 'c', '4', 'c', 'c', 'c', 'c', 'c', '3', '4', ')', 'c', '2', ')', 'c', '1', '<eos>']

SCMG/models/Transformer/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .model import *

SCMG/models/Transformer/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (202 Bytes). View file

SCMG/models/Transformer/__pycache__/model copy 2.cpython-310.pyc ADDED Viewed

Binary file (8.45 kB). View file