wangjin2000 commited on
Commit
2d9177f
·
verified ·
1 Parent(s): df08bd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -3
app.py CHANGED
@@ -42,7 +42,6 @@ class ProteinDataset(Dataset):
42
  self.tokenizer = tokenizer
43
  self.proteins = data["Receptor Sequence"].tolist()
44
  self.peptides = data["Binder"].tolist()
45
- print("44 self.peptides:",self.peptides)
46
  #self.proteins = data["P_Sequence"].tolist() #header defined by Lin Qiao
47
  #self.peptides = data["p_Sequence"].tolist()
48
  self.max_length_pm = 500 + 2 + peptide_length #assume the maz length of protein is 500
@@ -56,7 +55,6 @@ class ProteinDataset(Dataset):
56
 
57
  masked_peptide = '<mask>' * len(peptide_seq)
58
  complex_seq = protein_seq + masked_peptide
59
- print("58 complex_seq:",complex_seq)
60
  # Tokenize and pad the complex sequence
61
  complex_input = self.tokenizer(complex_seq, return_tensors="pt", padding="max_length", max_length = self.max_length_pm, truncation=True)
62
 
@@ -69,7 +67,7 @@ class ProteinDataset(Dataset):
69
 
70
  # Set non-masked positions in the labels tensor to -100
71
  labels = torch.where(input_ids == self.tokenizer.mask_token_id, labels, -100)
72
- print("71 idx,input_ids:", idx, input_ids)
73
  return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
74
 
75
  # fine-tuning function
 
42
  self.tokenizer = tokenizer
43
  self.proteins = data["Receptor Sequence"].tolist()
44
  self.peptides = data["Binder"].tolist()
 
45
  #self.proteins = data["P_Sequence"].tolist() #header defined by Lin Qiao
46
  #self.peptides = data["p_Sequence"].tolist()
47
  self.max_length_pm = 500 + 2 + peptide_length #assume the maz length of protein is 500
 
55
 
56
  masked_peptide = '<mask>' * len(peptide_seq)
57
  complex_seq = protein_seq + masked_peptide
 
58
  # Tokenize and pad the complex sequence
59
  complex_input = self.tokenizer(complex_seq, return_tensors="pt", padding="max_length", max_length = self.max_length_pm, truncation=True)
60
 
 
67
 
68
  # Set non-masked positions in the labels tensor to -100
69
  labels = torch.where(input_ids == self.tokenizer.mask_token_id, labels, -100)
70
+
71
  return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
72
 
73
  # fine-tuning function