Spaces:

thaidaev
/

zsp

Running

App Files Files

mgtotaro commited on Nov 21, 2025

Commit

31be27c

1 Parent(s): 7e12824

progress tracker

Browse files

Files changed (3) hide show

app.py +19 -25
data.py +5 -6
model.py +3 -6

app.py CHANGED Viewed

@@ -1,32 +1,27 @@
-from gradio import Blocks, Button, Checkbox, DownloadButton, Dropdown, Error, Examples, Image, HTML, Markdown, Tab, Textbox
 from model import ModelFactory
 from data import Data
-# Define scoring strategies
-SCORING = ["wt-marginals", "masked-marginals"]
 # Get available models
 MODELS = ModelFactory.models()
-def app(*argv):
     "Main application function"
-    # Unpack the arguments
-    seq, trg, model_name, *_ = argv
-    scoring = SCORING[scoring_strategy.value]
     # Validate the input
     if 1 > len(seq):
         raise Error("Sequence cannot be empty")
-    if 1 > len(trg):
         raise Error("Substitutions cannot be empty")
     # Calculate the data based on the input parameters
     try:
-        data = Data(seq, trg, model_name, scoring).calculate()
         if isinstance(data.image, str):
             return ( Image(value=data.image, type='filepath', visible=True)
-                   , HTML(visible=False)
                    , DownloadButton(value=data.csv, visible=True) )
         else:
             return ( Image(visible=False)
@@ -45,19 +40,20 @@ with Blocks() as demo:
                      , label="Sequence"
                      , placeholder="FASTA sequence here..."
                      , value='' )
-        trg = Textbox( lines=1
                      , label="Substitutions"
                      , placeholder="Substitutions here..."
                      , value='' )
-        model_name = Dropdown(MODELS, label="Model", value="facebook/esm2_t30_150M_UR50D")
-        scoring_strategy = Checkbox(value=True, label="Use higher accuracy scoring", interactive=True)
-        btn = Button(value="Run", variant="primary")
-        dlb = DownloadButton(label="Download raw data", visible=False)
-        out = Image(visible=False)
-        ouu = HTML(visible=False)
-        btn.click( fn=app
-                 , inputs=[seq, trg, model_name]
-                 , outputs=[out, ouu, dlb] )
         ex = Examples(
             examples=[
                 [   "MVEQYLLEAIVRDARDGITISDCSRPDNPLVFVNDAFTRMTGYDAEEVIGKNCRFLQRGDINLSAVHTIKIAMLTHEPCLVTLKNYRKDGTIFWNELSLTPIINKNGLITHYLGIQKDVSAQVILNQTLHEENHLLKSNKEMLEYLVNIDALTGLHNRRFLEDQLVIQWKLASRHINTITIFMIDIDYFKAFNDTYGHTAGDEALRTIAKTLNNCFMRGSDFVARYGGEEFTILAIGMTELQAHEYSTKLVQKIENLNIHHKGSPLGHLTISLGYSQANPQYHNDQNLVIEQADRALYSAKVEGKNRAVAYREQ"
@@ -73,10 +69,8 @@ with Blocks() as demo:
                 ,   "MVEQYLLEAIVRDARDGITISDCSRPDNPLVFVNDAFTRMTGYDAEEVIGKNCRFLQRGDINLSAVHTIKIAMLTHEPCLVTLKNYRKDGTIFWNELSLTPIINKNGLITHYLGIQKDVSAQVILNQTLHEENHLLKSNKEMLEYLVNIDALTGLHNRRFLEDQLVIQWKLASRHINTITIFMIDIDYFKAFNDTYGHTAGDEALRTIAKTLNNCFMWGSDFVARYGGEEFTILAIGMTELQAHEYSTKLVQKIENLNIHHKGSPLGHLTISLGYSQANPQYHNDQNLVIEQADRALYSAKVEGKNRAVAYREQ"
                 ,   "facebook/esm2_t33_650M_UR50D" ],
             ]
-          , inputs=[ seq
-                   , trg
-                   , model_name ]
-          , outputs=[out]
           , fn=app
           , cache_examples=False )
     with Tab("Instructions"):

+from gradio import Blocks, Button, Checkbox, DownloadButton, Dropdown, Error, Examples, Image, HTML, Markdown, Progress, Tab, Textbox
 from model import ModelFactory
 from data import Data
 # Get available models
 MODELS = ModelFactory.models()
+def app(seq, sub, model_name, acc):
     "Main application function"
+    scoring = "masked-marginals" if acc else "wt-marginals"
     # Validate the input
     if 1 > len(seq):
         raise Error("Sequence cannot be empty")
+    if 1 > len(sub):
         raise Error("Substitutions cannot be empty")
     # Calculate the data based on the input parameters
     try:
+        data = Data(seq, sub, model_name, scoring).calculate(progress)
         if isinstance(data.image, str):
             return ( Image(value=data.image, type='filepath', visible=True)
+                   , HTML()
                    , DownloadButton(value=data.csv, visible=True) )
         else:
             return ( Image(visible=False)
                      , label="Sequence"
                      , placeholder="FASTA sequence here..."
                      , value='' )
+        sub = Textbox( lines=1
                      , label="Substitutions"
                      , placeholder="Substitutions here..."
                      , value='' )
+        model_name  = Dropdown(MODELS, label="Model", value="facebook/esm2_t30_150M_UR50D")
+        acc_box     = Checkbox(value=True, label="Use higher accuracy scoring", interactive=True)
+        run_btn     = Button(value="Run", variant="primary")
+        dl_btn      = DownloadButton(label="Download raw data", visible=False)
+        progress    = Progress()
+        out_html    = HTML()
+        out_img     = Image(visible=False)
+        run_btn.click( fn=app
+                     , inputs=[seq, sub, model_name, acc_box]
+                     , outputs=[out_img, out_html, dl_btn] )
         ex = Examples(
             examples=[
                 [   "MVEQYLLEAIVRDARDGITISDCSRPDNPLVFVNDAFTRMTGYDAEEVIGKNCRFLQRGDINLSAVHTIKIAMLTHEPCLVTLKNYRKDGTIFWNELSLTPIINKNGLITHYLGIQKDVSAQVILNQTLHEENHLLKSNKEMLEYLVNIDALTGLHNRRFLEDQLVIQWKLASRHINTITIFMIDIDYFKAFNDTYGHTAGDEALRTIAKTLNNCFMRGSDFVARYGGEEFTILAIGMTELQAHEYSTKLVQKIENLNIHHKGSPLGHLTISLGYSQANPQYHNDQNLVIEQADRALYSAKVEGKNRAVAYREQ"
                 ,   "MVEQYLLEAIVRDARDGITISDCSRPDNPLVFVNDAFTRMTGYDAEEVIGKNCRFLQRGDINLSAVHTIKIAMLTHEPCLVTLKNYRKDGTIFWNELSLTPIINKNGLITHYLGIQKDVSAQVILNQTLHEENHLLKSNKEMLEYLVNIDALTGLHNRRFLEDQLVIQWKLASRHINTITIFMIDIDYFKAFNDTYGHTAGDEALRTIAKTLNNCFMWGSDFVARYGGEEFTILAIGMTELQAHEYSTKLVQKIENLNIHHKGSPLGHLTISLGYSQANPQYHNDQNLVIEQADRALYSAKVEGKNRAVAYREQ"
                 ,   "facebook/esm2_t33_650M_UR50D" ],
             ]
+          , inputs=[seq, sub, model_name]
+          , outputs=[out_img]
           , fn=app
           , cache_examples=False )
     with Tab("Instructions"):

data.py CHANGED Viewed

@@ -72,7 +72,7 @@ class Data:
         self.parse_seq(src)
         self.parse_sub(trg)
         self.scoring_strategy = scoring_strategy
-        self.token_probs = None
         self.out = pd.DataFrame(self.sub, columns=['0', self.model_name])
         self.out_img = f"{out_file}.png"
         self.out_csv = f"{out_file}.csv"
@@ -121,11 +121,9 @@ class Data:
     def concat_and_set_axis(self):
         return (pd.concat([(self.out.iloc[19*x:19*(x+1)]
                             .pipe(self.create_dataframe).sort_values(['0'], ascending=[True])
-                            .drop(["resi", '0'], axis=1)
-                            .astype(float)
                             .set_axis([ 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L'
-                                      , 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'])) for x in range(self.out.shape[0]//19)]
-                        , axis=1)
                         .set_axis([f'{a}{i}' for i, a in enumerate(self.seq, 1)], axis="columns"))
     def create_dataframe(self, df):
@@ -181,8 +179,9 @@ class Data:
             ax[i].set_xticklabels(ax[i].get_xticklabels(), rotation=90)
         fig.tight_layout()
-    def calculate(self):
         "run model and parse output"
         self.model.run_model(self)
         self.parse_output()
         return self

         self.parse_seq(src)
         self.parse_sub(trg)
         self.scoring_strategy = scoring_strategy
+        self.progress = None
         self.out = pd.DataFrame(self.sub, columns=['0', self.model_name])
         self.out_img = f"{out_file}.png"
         self.out_csv = f"{out_file}.csv"
     def concat_and_set_axis(self):
         return (pd.concat([(self.out.iloc[19*x:19*(x+1)]
                             .pipe(self.create_dataframe).sort_values(['0'], ascending=[True])
+                            .drop(["resi", '0'], axis=1).astype(float)
                             .set_axis([ 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L'
+                                      , 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'])) for x in range(self.out.shape[0]//19)], axis=1)
                         .set_axis([f'{a}{i}' for i, a in enumerate(self.seq, 1)], axis="columns"))
     def create_dataframe(self, df):
             ax[i].set_xticklabels(ax[i].get_xticklabels(), rotation=90)
         fig.tight_layout()
+    def calculate(self, progress):
         "run model and parse output"
+        self.progress = progress
         self.model.run_model(self)
         self.parse_output()
         return self

model.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from huggingface_hub import HfApi
 import torch
-from tqdm import tqdm
 from typing import Any
 from transformers import AutoTokenizer, AutoModelForMaskedLM
 from transformers.tokenization_utils_base import BatchEncoding
@@ -54,7 +53,7 @@ class ESMModel:
         if data.scoring_strategy.startswith("masked-marginals"):
             all_token_probs = []
             # For each token in the batch
-            for i in tqdm(range(batch_tokens.size()[1])):
                 # If the token is in the list of residues
                 if i in data.resi:
                     # Clone the batch tokens and mask the current token
@@ -73,9 +72,7 @@ class ESMModel:
         # Apply the label_row function to each row of the substitutions dataframe
         data.out[self.model_name] = data.sub.apply(
-            lambda row: label_row(
-                row['0']
-              , token_probs )
           , axis=1 )
 class E1Model:
@@ -96,7 +93,7 @@ class E1Model:
             self.scorer = E1Scorer(self.model, EncoderScoreMethod.WILDTYPE_MARGINAL)
         batch_size = 60 ## chunking to avoid OOM
         out = []
-        for chunk in tqdm([data.trg[i:i+batch_size] for i in range(0, len(data.trg), batch_size)]):
             scores = self.scorer.score(parent_sequence=data.seq, sequences=chunk)
             out.extend(s['score'] for s in scores)
         data.out[self.model_name] = out

 from huggingface_hub import HfApi
 import torch
 from typing import Any
 from transformers import AutoTokenizer, AutoModelForMaskedLM
 from transformers.tokenization_utils_base import BatchEncoding
         if data.scoring_strategy.startswith("masked-marginals"):
             all_token_probs = []
             # For each token in the batch
+            for i in data.progress.tqdm(range(batch_tokens.size()[1]), desc="Calculating"):
                 # If the token is in the list of residues
                 if i in data.resi:
                     # Clone the batch tokens and mask the current token
         # Apply the label_row function to each row of the substitutions dataframe
         data.out[self.model_name] = data.sub.apply(
+            lambda row: label_row(row['0'], token_probs)
           , axis=1 )
 class E1Model:
             self.scorer = E1Scorer(self.model, EncoderScoreMethod.WILDTYPE_MARGINAL)
         batch_size = 60 ## chunking to avoid OOM
         out = []
+        for chunk in data.progress.tqdm([data.trg[i:i+batch_size] for i in range(0, len(data.trg), batch_size)], desc="Calculating"):
             scores = self.scorer.score(parent_sequence=data.seq, sequences=chunk)
             out.extend(s['score'] for s in scores)
         data.out[self.model_name] = out