protgpt3 commited on
Commit
bf4fca6
·
verified ·
1 Parent(s): 376da1e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -90,7 +90,7 @@ def process_style(seq: str, gap: bool):
90
  return re.sub(r"[X]", "", seq.replace("-", "").upper())
91
 
92
  def build_prompt(
93
- sequences: List[str],
94
  gap: bool = False,
95
  ) -> str:
96
  """Build prompt for ProtGPT3-MSA"""
@@ -118,7 +118,7 @@ def build_prompt(
118
  model_id = "protgpt3/ProtGPT3-MSA" # Replace with the final checkpoint name
119
 
120
  # Load tokenizer for generation
121
- tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,add_bos_token=True, add_eos_token=False)
122
 
123
  model = AutoModelForCausalLM.from_pretrained(
124
  model_id,
@@ -178,7 +178,7 @@ aligned_homologs = [
178
  "MKTAYIAKQRQINNSFVKSHFSRQNILD",
179
  ]
180
 
181
- prompt = build_prompt(sequences=homologs, gap=True)
182
 
183
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
184
 
 
90
  return re.sub(r"[X]", "", seq.replace("-", "").upper())
91
 
92
  def build_prompt(
93
+ sequences: list,
94
  gap: bool = False,
95
  ) -> str:
96
  """Build prompt for ProtGPT3-MSA"""
 
118
  model_id = "protgpt3/ProtGPT3-MSA" # Replace with the final checkpoint name
119
 
120
  # Load tokenizer for generation
121
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,add_bos_token=False, add_eos_token=False) # BOS token manually added in build_prompt
122
 
123
  model = AutoModelForCausalLM.from_pretrained(
124
  model_id,
 
178
  "MKTAYIAKQRQINNSFVKSHFSRQNILD",
179
  ]
180
 
181
+ prompt = build_prompt(sequences=aligned_homologs, gap=True)
182
 
183
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
184