| A small snippet of code is given here in order to retrieve embeddings and gene expression predictions given a DNA, RNA and protein sequence. | |
| ```python | |
| from transformers import AutoTokenizer, AutoModelForMaskedLM | |
| import numpy as np | |
| import torch | |
| # Import the tokenizer and the model | |
| tokenizer = AutoTokenizer.from_pretrained("isoformer-anonymous/Isoformer", trust_remote_code=True) | |
| model = AutoModelForMaskedLM.from_pretrained("isoformer-anonymous/Isoformer",trust_remote_code=True) | |
| protein_sequences = ["RSRSRSRSRSRSRSRSRSRSRL" * 9] | |
| rna_sequences = ["ATTCCGGTTTTCA" * 9] | |
| sequence_length = 196_608 | |
| rng = np.random.default_rng(seed=0) | |
| dna_sequences = ["".join(rng.choice(list("ATCGN"), size=(sequence_length,)))] | |
| torch_tokens = tokenizer( | |
| dna_input=dna_sequences, rna_input=rna_sequences, protein_input=protein_sequences | |
| ) | |
| dna_torch_tokens = torch.tensor(torch_tokens[0]["input_ids"]) | |
| rna_torch_tokens = torch.tensor(torch_tokens[1]["input_ids"]) | |
| protein_torch_tokens = torch.tensor(torch_tokens[2]["input_ids"]) | |
| torch_output = model.forward( | |
| tensor_dna=dna_torch_tokens, | |
| tensor_rna=rna_torch_tokens, | |
| tensor_protein=protein_torch_tokens, | |
| attention_mask_rna=rna_torch_tokens != 1, | |
| attention_mask_protein=protein_torch_tokens != 1, | |
| ) | |
| print(f"Gene expression predictions: {torch_output['gene_expression_predictions']}") | |
| print(f"Final DNA embedding: {torch_output['final_dna_embeddings']}") | |
| ``` |