| #from rdkit import Chem | |
| import tensorflow as tf | |
| import torch as pt | |
| #from t5_tokenizer_model import SentencePieceUnigramTokenizer | |
| from pretokenizer import atomwise_tokenizer | |
| from transformers import AutoTokenizer, T5Tokenizer, T5ForConditionalGeneration, T5Config | |
| from tokenizers import Tokenizer | |
| import numpy as np | |
| #device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| #model = T5ForConditionalGeneration.from_pretrained(pretrained_model_name_or_path="./", from_flax=True) | |
| tokenizer = AutoTokenizer.from_pretrained("./") | |
| #tokenizer = Tokenizer.from_file("/home/zoez/chemT5") | |
| #model = model.to(device) | |
| #print(tokenizer.encode("O=[N+]([O-])c1ccc(Cl)cc1").tokens) | |
| # # # encode context the generation is conditioned on | |
| # input_ids1 = tokenizer.encode("1",return_tensors='pt') | |
| # print(input_ids1) | |
| # # # activate beam search and early_stopping | |
| # beam_output1 = model.generate( | |
| # input_ids1, | |
| # max_length=50, | |
| # num_beams=5, | |
| # early_stopping=True | |
| # ) | |
| encoding=tokenizer.encode("O=[N+]([O-])c1ccc(Cl)cc1") | |
| print(tokenizer.convert_ids_to_tokens(encoding)) | |
| # #print(tokenizer.encode("O=[N+]([O-])c1ccc(Cl)cc1").tokens) | |
| # # set seed to reproduce results. Feel free to change the seed though to get different results | |
| # tf.random.set_seed(0) | |
| # # use temperature to decrease the sensitivity to low probability candidates | |
| # sample_output = model.generate( | |
| # input_ids1, | |
| # do_sample=True, | |
| # max_length=50, | |
| # top_k=0, | |
| # temperature=0.7 | |
| # ) | |
| # print("Output:\n" + 100 * '-') | |
| # print(tokenizer.decode(sample_output[0], skip_special_tokens=True)) | |
| # print("Output: 1\n" + 100 * '-') | |
| # print(tokenizer.decode(beam_output1[0], skip_special_tokens=True)) | |
| # decoding=tokenizer.decode(beam_output1[0], skip_special_tokens=True) | |
| # print(tokenizer.convert_ids_to_tokens(decoding)) | |
| # # encode context the generation is conditioned on | |
| # input_ids2 = tokenizer.encode(": ",return_tensors='pt') | |
| # # activate beam search and early_stopping | |
| # beam_output2 = model.generate( | |
| # input_ids2, | |
| # max_length=50, | |
| # num_beams=9, | |
| # no_repeat_ngram_size=2, | |
| # num_return_sequences=9, | |
| # early_stopping=True | |
| # ) | |
| # print(tokenizer.encode("O=[N+]([O-])c1ccc(Cl)cc1")) | |
| # print("Output: 2\n" + 100 * '-') | |
| # print(tokenizer.decode(beam_output2[0], skip_special_tokens=True)) | |
| # # #start = latent_to_string(latent0) | |
| # # #destination = latent_to_string(latent1) | |
| # mols1 = [] | |
| # step = np.linspace(0,1,100) | |
| # invalid = 0 | |
| # steps = [] | |
| # step_invalid = [] | |
| # # Generate molcules using interpolation | |
| # for i, beam in enumerate(beam_output2): | |
| # #target_latent = (1.0-step[i])*latent0 + step[i]*latent1 | |
| # #string = latent_to_string(target_latent) | |
| # smiles = tokenizer.decode(beam, skip_special_tokens=True) # when using smies | |
| # print(tokenizer.decode(beam, skip_special_tokens=True)) | |
| # #smiles = sel.decoder(string) # when using SELFIES | |
| # mol = Chem.MolFromSmiles(smiles) | |
| # if mol: | |
| # if smiles not in mols1: | |
| # mols1.append(smiles) | |
| # steps.append(i) | |
| # else: | |
| # invalid = invalid + 1 | |
| # step_invalid.append(i) | |
| # #print("starting mol:", start) | |
| # #print('destination mol:', destination) | |
| # print("generated mols:", mols1) | |