Spaces:

ChatterjeeLab
/

zero_shot_mutation_prediction

Running

App Files Files Community

Kseniia-Kholina commited on Aug 6, 2024

Commit

e7999a2

verified ·

1 Parent(s): 0224cdb

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -4

app.py CHANGED Viewed

@@ -26,8 +26,51 @@ model = AutoModelForMaskedLM.from_pretrained(model_name, trust_remote_code=True)
 model.to(device)
 model.eval()
 def process_sequence(sequence, domain_bounds, n):
     start_index = int(domain_bounds['start'][0]) - 1
     end_index = int(domain_bounds['end'][0])
@@ -45,7 +88,6 @@ def process_sequence(sequence, domain_bounds, n):
               mask_token_logits = logits[0, mask_token_index, :]
               # Define amino acid tokens
-              AAs_tokens = ['L', 'A', 'G', 'V', 'S', 'E', 'R', 'T', 'I', 'D', 'P', 'K', 'Q', 'N', 'F', 'Y', 'M', 'H', 'W', 'C']
               all_tokens_logits = mask_token_logits.squeeze(0)
               top_tokens_indices = torch.argsort(all_tokens_logits, dim=0, descending=True)
               top_tokens_logits = all_tokens_logits[top_tokens_indices]
@@ -91,7 +133,7 @@ def process_sequence(sequence, domain_bounds, n):
     # Save the figure to a BytesIO object
     buf = BytesIO()
-    plt.savefig(buf, format='png', dpi=(300, 300))
     buf.seek(0)
     plt.close()
@@ -114,7 +156,7 @@ def process_sequence(sequence, domain_bounds, n):
         'Position': positions
     })
     df.to_csv("predicted_tokens.csv", index=False)
-    img.save("heatmap.png", dpi = 300)
     zip_path = "outputs.zip"
     with zipfile.ZipFile(zip_path, 'w') as zipf:
         zipf.write("predicted_tokens.csv")
@@ -143,4 +185,5 @@ demo = gr.Interface(
     ],
 )
 if __name__ == "__main__":
-    demo.launch()

 model.to(device)
 model.eval()
+@contextmanager
+def suppress_output():
+    with open(os.devnull, 'w') as devnull:
+        old_stdout = sys.stdout
+        sys.stdout = devnull
+        try:
+            yield
+        finally:
+            sys.stdout = old_stdout
 def process_sequence(sequence, domain_bounds, n):
+    AAs_tokens = ['L', 'A', 'G', 'V', 'S', 'E', 'R', 'T', 'I', 'D', 'P', 'K', 'Q', 'N', 'F', 'Y', 'M', 'H', 'W', 'C']
+    # checking sequence inputs
+    if not sequence.strip():
+      raise gr.Error("Error: The sequence input is empty. Please enter a valid protein sequence.")
+      return None, None, None
+    if any(char not in AAs_tokens for char in sequence):
+      raise gr.Error("Error: The sequence input contains non-amino acid characters. Please enter a valid protein sequence.")
+      return None, None, None
+    # checking domain bounds inputs
+    try:
+      start = int(domain_bounds['start'][0])
+      end = int(domain_bounds['end'][0])
+    except ValueError:
+      raise gr.Error("Error: Start and end indices must be integers.")
+      return None, None, None
+    if start >= end:
+      raise gr.Error("Start index must be smaller than end index.")
+      return None, None, None
+    if start == 0 and end != 0:
+      raise gr.Error("Indexing starts at 1. Please enter valid domain bounds.")
+      return None, None, None
+    if start == 0 or end == 0:
+      raise gr.Error("Domain bounds cannot be zero. Please enter valid domain bounds.")
+      return None, None, None
+    if start > len(sequence) or end > len(sequence):
+      raise gr.Error("Domain bounds exceed sequence length.")
+      return None, None, None
+    # checking n inputs
+    if n == None:
+      raise gr.Error("Choose Top N Tokens from the dropdown menu.")
+      return None, None, None
     start_index = int(domain_bounds['start'][0]) - 1
     end_index = int(domain_bounds['end'][0])
               mask_token_logits = logits[0, mask_token_index, :]
               # Define amino acid tokens
               all_tokens_logits = mask_token_logits.squeeze(0)
               top_tokens_indices = torch.argsort(all_tokens_logits, dim=0, descending=True)
               top_tokens_logits = all_tokens_logits[top_tokens_indices]
     # Save the figure to a BytesIO object
     buf = BytesIO()
+    plt.savefig(buf, format='png', dpi = 300)
     buf.seek(0)
     plt.close()
         'Position': positions
     })
     df.to_csv("predicted_tokens.csv", index=False)
+    img.save("heatmap.png", dpi=(300, 300))
     zip_path = "outputs.zip"
     with zipfile.ZipFile(zip_path, 'w') as zipf:
         zipf.write("predicted_tokens.csv")
     ],
 )
 if __name__ == "__main__":
+    with suppress_output():
+      demo.launch()