JadenLong
/

MutBERT-Human-Ref

Feature Extraction

bioRxiv 2025.01.23.634452

Model card Files Files and versions

JadenLong commited on Apr 2, 2025

Commit

de3ac48

·

verified ·

1 Parent(s): 1cc6401

Update README.md

Files changed (1) hide show

README.md +3 -3

README.md CHANGED Viewed

@@ -29,7 +29,7 @@ MutBERT is a transformer-based genome foundation model trained only on Human gen
 ```python
 from transformers import AutoTokenizer, AutoModel
-model_name = "JadenLong/MutBERT"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
 ```
@@ -52,7 +52,7 @@ dna = "ATCGGGGCCCATTA"
 inputs = tokenizer(dna, return_tensors='pt')["input_ids"]
 mut_inputs = F.one_hot(inputs, num_classes=len(tokenizer)).float().to("cpu")  # len(tokenizer) is vocab size
-last_hidden_state = model(inputs).last_hidden_state   # [1, sequence_length, 768]
 # or: last_hidden_state = model(mut_inputs)[0]        # [1, sequence_length, 768]
 # embedding with mean pooling
@@ -60,7 +60,7 @@ embedding_mean = torch.mean(last_hidden_state[0], dim=0)
 print(embedding_mean.shape) # expect to be 768
 # embedding with max pooling
-embedding_max = torch.max(hidden_states[0], dim=0)[0]
 print(embedding_max.shape) # expect to be 768
 ```

 ```python
 from transformers import AutoTokenizer, AutoModel
+model_name = "JadenLong/MutBERT-Human-Ref"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
 ```
 inputs = tokenizer(dna, return_tensors='pt')["input_ids"]
 mut_inputs = F.one_hot(inputs, num_classes=len(tokenizer)).float().to("cpu")  # len(tokenizer) is vocab size
+last_hidden_state = model(mut_inputs).last_hidden_state   # [1, sequence_length, 768]
 # or: last_hidden_state = model(mut_inputs)[0]        # [1, sequence_length, 768]
 # embedding with mean pooling
 print(embedding_mean.shape) # expect to be 768
 # embedding with max pooling
+embedding_max = torch.max(last_hidden_state[0], dim=0)[0]
 print(embedding_max.shape) # expect to be 768
 ```