JadenLong commited on
Commit
969e58b
·
verified ·
1 Parent(s): 9477432

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -4
README.md CHANGED
@@ -29,7 +29,7 @@ MutBERT is a transformer-based genome foundation model trained only on Human gen
29
  ```python
30
  from transformers import AutoTokenizer, AutoModel
31
 
32
- model_name = "JadenLong/MutBERT"
33
  tokenizer = AutoTokenizer.from_pretrained(model_name)
34
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
35
  ```
@@ -52,7 +52,7 @@ dna = "ATCGGGGCCCATTA"
52
  inputs = tokenizer(dna, return_tensors='pt')["input_ids"]
53
 
54
  mut_inputs = F.one_hot(inputs, num_classes=len(tokenizer)).float().to("cpu") # len(tokenizer) is vocab size
55
- last_hidden_state = model(inputs).last_hidden_state # [1, sequence_length, 768]
56
  # or: last_hidden_state = model(mut_inputs)[0] # [1, sequence_length, 768]
57
 
58
  # embedding with mean pooling
@@ -60,8 +60,9 @@ embedding_mean = torch.mean(last_hidden_state[0], dim=0)
60
  print(embedding_mean.shape) # expect to be 768
61
 
62
  # embedding with max pooling
63
- embedding_max = torch.max(hidden_states[0], dim=0)[0]
64
  print(embedding_max.shape) # expect to be 768
 
65
  ```
66
 
67
  ### Using as a Classifier
@@ -69,7 +70,7 @@ print(embedding_max.shape) # expect to be 768
69
  ```python
70
  from transformers import AutoModelForSequenceClassification
71
 
72
- model_name = "JadenLong/MutBERT"
73
  model = AutoModelForSequenceClassification.from_pretrained(model_name, trust_remote_code=True, num_labels=2)
74
  ```
75
 
@@ -80,6 +81,7 @@ Allowed types for RoPE scaling are: `linear` and `dynamic`. To extend the model'
80
  If you want to scale your model context by 2x:
81
 
82
  ```python
 
83
  model = AutoModel.from_pretrained(model_name,
84
  trust_remote_code=True,
85
  rope_scaling={'type': 'dynamic','factor': 2.0}
 
29
  ```python
30
  from transformers import AutoTokenizer, AutoModel
31
 
32
+ model_name = "JadenLong/MutBERT-Multi"
33
  tokenizer = AutoTokenizer.from_pretrained(model_name)
34
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
35
  ```
 
52
  inputs = tokenizer(dna, return_tensors='pt')["input_ids"]
53
 
54
  mut_inputs = F.one_hot(inputs, num_classes=len(tokenizer)).float().to("cpu") # len(tokenizer) is vocab size
55
+ last_hidden_state = model(mut_inputs).last_hidden_state # [1, sequence_length, 768]
56
  # or: last_hidden_state = model(mut_inputs)[0] # [1, sequence_length, 768]
57
 
58
  # embedding with mean pooling
 
60
  print(embedding_mean.shape) # expect to be 768
61
 
62
  # embedding with max pooling
63
+ embedding_max = torch.max(last_hidden_state[0], dim=0)[0]
64
  print(embedding_max.shape) # expect to be 768
65
+
66
  ```
67
 
68
  ### Using as a Classifier
 
70
  ```python
71
  from transformers import AutoModelForSequenceClassification
72
 
73
+ model_name = "JadenLong/MutBERT-Multi"
74
  model = AutoModelForSequenceClassification.from_pretrained(model_name, trust_remote_code=True, num_labels=2)
75
  ```
76
 
 
81
  If you want to scale your model context by 2x:
82
 
83
  ```python
84
+ model_name = "JadenLong/MutBERT-Multi"
85
  model = AutoModel.from_pretrained(model_name,
86
  trust_remote_code=True,
87
  rope_scaling={'type': 'dynamic','factor': 2.0}