D-SCRIPT

Sleeping

App Files Files Community

samsl commited on Aug 24, 2023

Commit

ff2b104

1 Parent(s): 809fb87

File download and multiple models

Browse files

Files changed (1) hide show

app.py +27 -6

app.py CHANGED Viewed

@@ -5,10 +5,23 @@ from Bio import SeqIO
 from dscript.pretrained import get_pretrained
 from dscript.language_model import lm_embed
 from tqdm.auto import tqdm
-def predict(sequence_file, pairs_file):
-    model = get_pretrained('human_v1')
     seqs = SeqIO.to_dict(SeqIO.parse(sequence_file.name, "fasta"))
     if Path(pairs_file.name).suffix == ".csv":
         pairs = pd.read_csv(pairs_file.name)
@@ -16,9 +29,11 @@ def predict(sequence_file, pairs_file):
         pairs = pd.read_csv(pairs_file.name, sep="\t")
     pairs.columns = ["protein1", "protein2"]
     results = []
     progress = gr.Progress(track_tqdm=True)
     for i, r in tqdm(pairs.iterrows(), total=len(pairs)):
         prot1 = r["protein1"]
         prot2 = r["protein2"]
         seq1 = str(seqs[prot1].seq)
@@ -27,20 +42,26 @@ def predict(sequence_file, pairs_file):
         lm2 = lm_embed(seq2)
         interaction = model.predict(lm1, lm2).item()
         results.append([prot1, prot2, interaction])
-        # progress((i, len(pairs)))
     results = pd.DataFrame(results, columns = ["Protein 1", "Protein 2", "Interaction"])
-    return results
 demo = gr.Interface(
     fn=predict,
     inputs = [
         gr.File(label="Sequences (.fasta)", file_types = [".fasta"]),
         gr.File(label="Pairs (.csv/.tsv)", file_types = [".csv", ".tsv"])
     ],
     outputs = [
-        gr.DataFrame(label='Results', headers=['Protein 1', 'Protein 2', 'Interaction'])
     ]
 )

 from dscript.pretrained import get_pretrained
 from dscript.language_model import lm_embed
 from tqdm.auto import tqdm
+from uuid import uuid4
+model_map = {
+    "D-SCRIPT": "human_v1",
+    "Topsy-Turvy": "human_v2"
+}
+def predict(model, sequence_file, pairs_file):
+    run_id = uuid4()
+    gr.Info("Loading model...")
+    _ = lm_embed("M")
+    model = get_pretrained(model_map[model])
+    gr.Info("Loading files...")
     seqs = SeqIO.to_dict(SeqIO.parse(sequence_file.name, "fasta"))
     if Path(pairs_file.name).suffix == ".csv":
         pairs = pd.read_csv(pairs_file.name)
         pairs = pd.read_csv(pairs_file.name, sep="\t")
     pairs.columns = ["protein1", "protein2"]
+    gr.Info("Predicting...")
     results = []
     progress = gr.Progress(track_tqdm=True)
     for i, r in tqdm(pairs.iterrows(), total=len(pairs)):
+        gr.Info(f"[{i+1}/{len(pairs)}]")
         prot1 = r["protein1"]
         prot2 = r["protein2"]
         seq1 = str(seqs[prot1].seq)
         lm2 = lm_embed(seq2)
         interaction = model.predict(lm1, lm2).item()
         results.append([prot1, prot2, interaction])
+        progress((i, len(pairs)))
     results = pd.DataFrame(results, columns = ["Protein 1", "Protein 2", "Interaction"])
+    file_path = f"/tmp/{run_id}.tsv"
+    with open(file_path, "w") as f:
+        results.to_csv(f, sep="\t", index=False, header = True)
+    return results, file_path
 demo = gr.Interface(
     fn=predict,
     inputs = [
+        gr.Dropdown(label="Model", choices = ["D-SCRIPT", "Topsy-Turvy"], value = "Topsy-Turvy"),
         gr.File(label="Sequences (.fasta)", file_types = [".fasta"]),
         gr.File(label="Pairs (.csv/.tsv)", file_types = [".csv", ".tsv"])
     ],
     outputs = [
+        gr.DataFrame(label='Results', headers=['Protein 1', 'Protein 2', 'Interaction']),
+        gr.File(label="Download results", type="file")
     ]
 )