Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -42,7 +42,6 @@ class ProteinDataset(Dataset):
|
|
| 42 |
self.tokenizer = tokenizer
|
| 43 |
self.proteins = data["Receptor Sequence"].tolist()
|
| 44 |
self.peptides = data["Binder"].tolist()
|
| 45 |
-
print("44 self.peptides:",self.peptides)
|
| 46 |
#self.proteins = data["P_Sequence"].tolist() #header defined by Lin Qiao
|
| 47 |
#self.peptides = data["p_Sequence"].tolist()
|
| 48 |
self.max_length_pm = 500 + 2 + peptide_length #assume the maz length of protein is 500
|
|
@@ -56,7 +55,6 @@ class ProteinDataset(Dataset):
|
|
| 56 |
|
| 57 |
masked_peptide = '<mask>' * len(peptide_seq)
|
| 58 |
complex_seq = protein_seq + masked_peptide
|
| 59 |
-
print("58 complex_seq:",complex_seq)
|
| 60 |
# Tokenize and pad the complex sequence
|
| 61 |
complex_input = self.tokenizer(complex_seq, return_tensors="pt", padding="max_length", max_length = self.max_length_pm, truncation=True)
|
| 62 |
|
|
@@ -69,7 +67,7 @@ class ProteinDataset(Dataset):
|
|
| 69 |
|
| 70 |
# Set non-masked positions in the labels tensor to -100
|
| 71 |
labels = torch.where(input_ids == self.tokenizer.mask_token_id, labels, -100)
|
| 72 |
-
|
| 73 |
return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
|
| 74 |
|
| 75 |
# fine-tuning function
|
|
|
|
| 42 |
self.tokenizer = tokenizer
|
| 43 |
self.proteins = data["Receptor Sequence"].tolist()
|
| 44 |
self.peptides = data["Binder"].tolist()
|
|
|
|
| 45 |
#self.proteins = data["P_Sequence"].tolist() #header defined by Lin Qiao
|
| 46 |
#self.peptides = data["p_Sequence"].tolist()
|
| 47 |
self.max_length_pm = 500 + 2 + peptide_length #assume the maz length of protein is 500
|
|
|
|
| 55 |
|
| 56 |
masked_peptide = '<mask>' * len(peptide_seq)
|
| 57 |
complex_seq = protein_seq + masked_peptide
|
|
|
|
| 58 |
# Tokenize and pad the complex sequence
|
| 59 |
complex_input = self.tokenizer(complex_seq, return_tensors="pt", padding="max_length", max_length = self.max_length_pm, truncation=True)
|
| 60 |
|
|
|
|
| 67 |
|
| 68 |
# Set non-masked positions in the labels tensor to -100
|
| 69 |
labels = torch.where(input_ids == self.tokenizer.mask_token_id, labels, -100)
|
| 70 |
+
|
| 71 |
return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
|
| 72 |
|
| 73 |
# fine-tuning function
|