JadenLong commited on
Commit
de3ac48
·
verified ·
1 Parent(s): 1cc6401

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -29,7 +29,7 @@ MutBERT is a transformer-based genome foundation model trained only on Human gen
29
  ```python
30
  from transformers import AutoTokenizer, AutoModel
31
 
32
- model_name = "JadenLong/MutBERT"
33
  tokenizer = AutoTokenizer.from_pretrained(model_name)
34
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
35
  ```
@@ -52,7 +52,7 @@ dna = "ATCGGGGCCCATTA"
52
  inputs = tokenizer(dna, return_tensors='pt')["input_ids"]
53
 
54
  mut_inputs = F.one_hot(inputs, num_classes=len(tokenizer)).float().to("cpu") # len(tokenizer) is vocab size
55
- last_hidden_state = model(inputs).last_hidden_state # [1, sequence_length, 768]
56
  # or: last_hidden_state = model(mut_inputs)[0] # [1, sequence_length, 768]
57
 
58
  # embedding with mean pooling
@@ -60,7 +60,7 @@ embedding_mean = torch.mean(last_hidden_state[0], dim=0)
60
  print(embedding_mean.shape) # expect to be 768
61
 
62
  # embedding with max pooling
63
- embedding_max = torch.max(hidden_states[0], dim=0)[0]
64
  print(embedding_max.shape) # expect to be 768
65
  ```
66
 
 
29
  ```python
30
  from transformers import AutoTokenizer, AutoModel
31
 
32
+ model_name = "JadenLong/MutBERT-Human-Ref"
33
  tokenizer = AutoTokenizer.from_pretrained(model_name)
34
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
35
  ```
 
52
  inputs = tokenizer(dna, return_tensors='pt')["input_ids"]
53
 
54
  mut_inputs = F.one_hot(inputs, num_classes=len(tokenizer)).float().to("cpu") # len(tokenizer) is vocab size
55
+ last_hidden_state = model(mut_inputs).last_hidden_state # [1, sequence_length, 768]
56
  # or: last_hidden_state = model(mut_inputs)[0] # [1, sequence_length, 768]
57
 
58
  # embedding with mean pooling
 
60
  print(embedding_mean.shape) # expect to be 768
61
 
62
  # embedding with max pooling
63
+ embedding_max = torch.max(last_hidden_state[0], dim=0)[0]
64
  print(embedding_max.shape) # expect to be 768
65
  ```
66