wangjin2000 commited on
Commit
aa0a8ad
·
verified ·
1 Parent(s): a9f4b7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -156,8 +156,10 @@ def generate_peptide_for_single_sequence(model, tokenizer, protein_seq, peptide_
156
  num_binders = int(num_binders)
157
 
158
  binders_with_ppl = []
159
-
160
  for _ in range(num_binders):
 
 
161
  # Generate binder
162
  masked_peptide = '<mask>' * peptide_length
163
  input_sequence = protein_seq + masked_peptide
@@ -167,18 +169,21 @@ def generate_peptide_for_single_sequence(model, tokenizer, protein_seq, peptide_
167
  logits = model(**inputs).logits
168
  mask_token_indices = (inputs["input_ids"] == tokenizer.mask_token_id).nonzero(as_tuple=True)[1]
169
  logits_at_masks = logits[0, mask_token_indices]
170
-
 
171
  # Apply top-k sampling
172
  top_k_logits, top_k_indices = logits_at_masks.topk(top_k, dim=-1)
173
  probabilities = torch.nn.functional.softmax(top_k_logits, dim=-1)
174
  predicted_indices = Categorical(probabilities).sample()
175
  predicted_token_ids = top_k_indices.gather(-1, predicted_indices.unsqueeze(-1)).squeeze(-1)
176
-
 
177
  generated_binder = tokenizer.decode(predicted_token_ids, skip_special_tokens=True).replace(' ', '')
178
 
179
  # Compute PPL for the generated binder
180
  ppl_value = compute_pseudo_perplexity(model, tokenizer, protein_seq, generated_binder)
181
-
 
182
  # Add the generated binder and its PPL to the results list
183
  binders_with_ppl.append([generated_binder, ppl_value])
184
 
 
156
  num_binders = int(num_binders)
157
 
158
  binders_with_ppl = []
159
+ n = 0
160
  for _ in range(num_binders):
161
+ n += 1
162
+ print("n in num_binders:", n)
163
  # Generate binder
164
  masked_peptide = '<mask>' * peptide_length
165
  input_sequence = protein_seq + masked_peptide
 
169
  logits = model(**inputs).logits
170
  mask_token_indices = (inputs["input_ids"] == tokenizer.mask_token_id).nonzero(as_tuple=True)[1]
171
  logits_at_masks = logits[0, mask_token_indices]
172
+ print("mask_token_indices 170:",mask_token_indices)
173
+
174
  # Apply top-k sampling
175
  top_k_logits, top_k_indices = logits_at_masks.topk(top_k, dim=-1)
176
  probabilities = torch.nn.functional.softmax(top_k_logits, dim=-1)
177
  predicted_indices = Categorical(probabilities).sample()
178
  predicted_token_ids = top_k_indices.gather(-1, predicted_indices.unsqueeze(-1)).squeeze(-1)
179
+ print("predicted_token_ids 177:",predicted_token_ids)
180
+
181
  generated_binder = tokenizer.decode(predicted_token_ids, skip_special_tokens=True).replace(' ', '')
182
 
183
  # Compute PPL for the generated binder
184
  ppl_value = compute_pseudo_perplexity(model, tokenizer, protein_seq, generated_binder)
185
+ print("ppl_value:", ppl_value)
186
+
187
  # Add the generated binder and its PPL to the results list
188
  binders_with_ppl.append([generated_binder, ppl_value])
189