Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -156,8 +156,10 @@ def generate_peptide_for_single_sequence(model, tokenizer, protein_seq, peptide_
|
|
| 156 |
num_binders = int(num_binders)
|
| 157 |
|
| 158 |
binders_with_ppl = []
|
| 159 |
-
|
| 160 |
for _ in range(num_binders):
|
|
|
|
|
|
|
| 161 |
# Generate binder
|
| 162 |
masked_peptide = '<mask>' * peptide_length
|
| 163 |
input_sequence = protein_seq + masked_peptide
|
|
@@ -167,18 +169,21 @@ def generate_peptide_for_single_sequence(model, tokenizer, protein_seq, peptide_
|
|
| 167 |
logits = model(**inputs).logits
|
| 168 |
mask_token_indices = (inputs["input_ids"] == tokenizer.mask_token_id).nonzero(as_tuple=True)[1]
|
| 169 |
logits_at_masks = logits[0, mask_token_indices]
|
| 170 |
-
|
|
|
|
| 171 |
# Apply top-k sampling
|
| 172 |
top_k_logits, top_k_indices = logits_at_masks.topk(top_k, dim=-1)
|
| 173 |
probabilities = torch.nn.functional.softmax(top_k_logits, dim=-1)
|
| 174 |
predicted_indices = Categorical(probabilities).sample()
|
| 175 |
predicted_token_ids = top_k_indices.gather(-1, predicted_indices.unsqueeze(-1)).squeeze(-1)
|
| 176 |
-
|
|
|
|
| 177 |
generated_binder = tokenizer.decode(predicted_token_ids, skip_special_tokens=True).replace(' ', '')
|
| 178 |
|
| 179 |
# Compute PPL for the generated binder
|
| 180 |
ppl_value = compute_pseudo_perplexity(model, tokenizer, protein_seq, generated_binder)
|
| 181 |
-
|
|
|
|
| 182 |
# Add the generated binder and its PPL to the results list
|
| 183 |
binders_with_ppl.append([generated_binder, ppl_value])
|
| 184 |
|
|
|
|
| 156 |
num_binders = int(num_binders)
|
| 157 |
|
| 158 |
binders_with_ppl = []
|
| 159 |
+
n = 0
|
| 160 |
for _ in range(num_binders):
|
| 161 |
+
n += 1
|
| 162 |
+
print("n in num_binders:", n)
|
| 163 |
# Generate binder
|
| 164 |
masked_peptide = '<mask>' * peptide_length
|
| 165 |
input_sequence = protein_seq + masked_peptide
|
|
|
|
| 169 |
logits = model(**inputs).logits
|
| 170 |
mask_token_indices = (inputs["input_ids"] == tokenizer.mask_token_id).nonzero(as_tuple=True)[1]
|
| 171 |
logits_at_masks = logits[0, mask_token_indices]
|
| 172 |
+
print("mask_token_indices 170:",mask_token_indices)
|
| 173 |
+
|
| 174 |
# Apply top-k sampling
|
| 175 |
top_k_logits, top_k_indices = logits_at_masks.topk(top_k, dim=-1)
|
| 176 |
probabilities = torch.nn.functional.softmax(top_k_logits, dim=-1)
|
| 177 |
predicted_indices = Categorical(probabilities).sample()
|
| 178 |
predicted_token_ids = top_k_indices.gather(-1, predicted_indices.unsqueeze(-1)).squeeze(-1)
|
| 179 |
+
print("predicted_token_ids 177:",predicted_token_ids)
|
| 180 |
+
|
| 181 |
generated_binder = tokenizer.decode(predicted_token_ids, skip_special_tokens=True).replace(' ', '')
|
| 182 |
|
| 183 |
# Compute PPL for the generated binder
|
| 184 |
ppl_value = compute_pseudo_perplexity(model, tokenizer, protein_seq, generated_binder)
|
| 185 |
+
print("ppl_value:", ppl_value)
|
| 186 |
+
|
| 187 |
# Add the generated binder and its PPL to the results list
|
| 188 |
binders_with_ppl.append([generated_binder, ppl_value])
|
| 189 |
|