Spaces:

sagawa
/

PLTNUM

Sleeping

App Files Files Community

sagawa commited on Aug 28, 2024

Commit

42d0bd4

verified ·

1 Parent(s): 20c06cf

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -31

app.py CHANGED Viewed

@@ -30,31 +30,35 @@ class Config:
 def predict_stability_with_pdb(model_choice, organism_choice, pdb_files, cfg=Config()):
-    results = []
     for pdb_file in pdb_files:
         try:
             pdb_path = pdb_file.name
             os.system("chmod 777 bin/foldseek")
             sequences = get_foldseek_seq(pdb_path)
             if not sequences:
-                results.append({"file_name": pdb_path,
-                                "raw prediction value": None,
-                                "binary prediction value": None
-                                })
                 continue
             sequence = sequences[2] if model_choice == "SaProt" else sequences[0]
-            output = predict_stability_core(model_choice, organism_choice, sequence, cfg)
-            results.append({"file_name": pdb_path,
-                            "raw prediction value": output["raw prediction values"][0],
-                            "binary prediction value": output["binary prediction values"][0]
-                            })
         except Exception as e:
-            results.append({"file_name": pdb_file.name,
-                            "raw prediction value": None,
-                            "binary prediction value": None
-                            })
     df = pd.DataFrame(results)
     output_csv = "/tmp/predictions.csv"
@@ -72,13 +76,13 @@ def predict_stability_with_sequence(model_choice, organism_choice, sequence, cfg
         return f"An error occurred: {str(e)}"
-def predict_stability_core(model_choice, organism_choice, sequence, cfg=Config()):
     cell_line = "HeLa" if organism_choice == "Human" else "NIH3T3"
     cfg.model = f"sagawa/PLTNUM-{model_choice}-{cell_line}"
     cfg.architecture = model_choice
     cfg.model_path = f"sagawa/PLTNUM-{model_choice}-{cell_line}"
-    output = predict(cfg, sequence)
     return output
@@ -92,7 +96,7 @@ def get_foldseek_seq(pdb_path):
     return parsed_seqs
-def predict(cfg, sequence):
     cfg.token_length = 2 if cfg.architecture == "SaProt" else 1
     cfg.device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -100,7 +104,7 @@ def predict(cfg, sequence):
         cfg.max_length += 1
     seed_everything(cfg.seed)
-    df = pd.DataFrame({cfg.sequence_col: [sequence]})
     tokenizer = AutoTokenizer.from_pretrained(
         cfg.model_path, padding_side=cfg.padding_side
@@ -134,19 +138,8 @@ def predict(cfg, sequence):
         predictions += preds.cpu().tolist()
     predictions = list(itertools.chain.from_iterable(predictions))
-    outputs = {
-        "raw prediction values": predictions,
-        "binary prediction values": [1 if x > 0.5 else 0 for x in predictions]
-    }
-    html_output = f"""
-    <div style='border: 2px solid #4CAF50; padding: 10px; border-radius: 10px;'>
-        <p><strong>Raw prediction value:</strong> {outputs['raw prediction values'][0]}</p>
-        <p><strong>Binary prediction values:</strong> {outputs['binary prediction values'][0]}</p>
-    </div>
-    """
-    return html_output
 # Gradio Interface

 def predict_stability_with_pdb(model_choice, organism_choice, pdb_files, cfg=Config()):
+    results = {"file_name": [],
+               "raw prediction value": [],
+               "binary prediction value": []
+               }
+    file_names = []
+    sequences = []
     for pdb_file in pdb_files:
         try:
             pdb_path = pdb_file.name
             os.system("chmod 777 bin/foldseek")
             sequences = get_foldseek_seq(pdb_path)
             if not sequences:
+                results["file_name"].append(pdb_file.name)
+                results["raw prediction value"].append(None)
+                results["binary prediction value"].append(None)
                 continue
             sequence = sequences[2] if model_choice == "SaProt" else sequences[0]
+            file_names.append(pdb_file.name)
+            sequences.append(sequence)
         except Exception as e:
+                results["file_name"].append(pdb_file.name)
+                results["raw prediction value"].append(None)
+                results["binary prediction value"].append(None)
+    raw_prediction, binary_prediction = predict_stability_core(model_choice, organism_choice, sequences, cfg)
+    results["file_name"] = results["file_name"] + file_names
+    results["raw prediction value"] = results["raw prediction value"] + raw_prediction
+    results["binary prediction value"] = results["binary prediction value"] + binary_prediction
     df = pd.DataFrame(results)
     output_csv = "/tmp/predictions.csv"
         return f"An error occurred: {str(e)}"
+def predict_stability_core(model_choice, organism_choice, sequences, cfg=Config()):
     cell_line = "HeLa" if organism_choice == "Human" else "NIH3T3"
     cfg.model = f"sagawa/PLTNUM-{model_choice}-{cell_line}"
     cfg.architecture = model_choice
     cfg.model_path = f"sagawa/PLTNUM-{model_choice}-{cell_line}"
+    output = predict(cfg, sequences)
     return output
     return parsed_seqs
+def predict(cfg, sequences):
     cfg.token_length = 2 if cfg.architecture == "SaProt" else 1
     cfg.device = "cuda" if torch.cuda.is_available() else "cpu"
         cfg.max_length += 1
     seed_everything(cfg.seed)
+    df = pd.DataFrame({cfg.sequence_col: sequences})
     tokenizer = AutoTokenizer.from_pretrained(
         cfg.model_path, padding_side=cfg.padding_side
         predictions += preds.cpu().tolist()
     predictions = list(itertools.chain.from_iterable(predictions))
+    return predictions, [1 if x > 0.5 else 0 for x in predictions]
 # Gradio Interface