Spaces:

Valmbd
/

Petimot

Running

App Files Files Community

Valmbd commited on about 1 month ago

Commit

c8ad6f1

verified ·

1 Parent(s): 8bdea01

Update data_loader to read directly from zip

Browse files

Files changed (1) hide show

app/utils/data_loader.py +52 -4

app/utils/data_loader.py CHANGED Viewed

@@ -6,8 +6,16 @@ from pathlib import Path
 from functools import lru_cache
 def find_predictions_dir(root: str) -> str | None:
-    """Find the predictions directory (most recent model)."""
     pred_root = os.path.join(root, "predictions")
     if not os.path.isdir(pred_root):
         return None
@@ -22,17 +30,38 @@ def find_predictions_dir(root: str) -> str | None:
 def load_prediction_index(pred_dir: str) -> pd.DataFrame:
     """Build index of all predicted proteins with metadata."""
     rows = []
-    mode_files = glob.glob(os.path.join(pred_dir, "*_mode_0.txt"))
     for mf in mode_files:
         base = os.path.basename(mf).replace("_mode_0.txt", "")
-        # Load mode 0 for stats
         try:
             vecs = np.loadtxt(mf)
             n_res = len(vecs)
             mag = np.linalg.norm(vecs, axis=1)
-            # Count available modes
             n_modes = 0
             for k in range(10):
                 if os.path.exists(os.path.join(pred_dir, f"{base}_mode_{k}.txt")):
@@ -57,6 +86,25 @@ def load_prediction_index(pred_dir: str) -> pd.DataFrame:
 def load_modes(pred_dir: str, name: str) -> dict[int, np.ndarray]:
     """Load all mode files for a protein."""
     modes = {}
     for k in range(10):
         for pfx in [f"extracted_{name}", name]:
             mf = os.path.join(pred_dir, f"{pfx}_mode_{k}.txt")

 from functools import lru_cache
+def get_predictions_zip(root: str) -> str | None:
+    """Find predictions.zip in the root directory."""
+    zip_path = os.path.join(root, "predictions.zip")
+    return zip_path if os.path.exists(zip_path) else None
 def find_predictions_dir(root: str) -> str | None:
+    """Find the predictions directory (most recent model) or zip."""
+    if get_predictions_zip(root):
+        return root  # Signal that we have a zip
     pred_root = os.path.join(root, "predictions")
     if not os.path.isdir(pred_root):
         return None
 def load_prediction_index(pred_dir: str) -> pd.DataFrame:
     """Build index of all predicted proteins with metadata."""
     rows = []
+    zip_path = get_predictions_zip(pred_dir)
+    if zip_path:
+        import zipfile
+        with zipfile.ZipFile(zip_path, 'r') as zf:
+            idx_file = next((f for f in zf.namelist() if f.endswith("index.json")), None)
+            if idx_file:
+                with zf.open(idx_file) as f:
+                    index_dict = json.load(f)
+                    for k, v in index_dict.items():
+                        rows.append({
+                            "name": k,
+                            "seq_len": v.get("seq_len", 0),
+                            "n_modes": v.get("n_modes", 0),
+                            "mean_disp_m0": v.get("mean_disp", 0.0),
+                            "max_disp_m0": v.get("max_disp", 0.0),
+                            "top_residue": -1,
+                        })
+                    return pd.DataFrame(rows).sort_values("name").reset_index(drop=True)
+    # Fallback to loose files
+    if not os.path.isdir(pred_dir):
+        return pd.DataFrame()
+    mode_files = glob.glob(os.path.join(pred_dir, "*_mode_0.txt"))
     for mf in mode_files:
         base = os.path.basename(mf).replace("_mode_0.txt", "")
         try:
             vecs = np.loadtxt(mf)
             n_res = len(vecs)
             mag = np.linalg.norm(vecs, axis=1)
             n_modes = 0
             for k in range(10):
                 if os.path.exists(os.path.join(pred_dir, f"{base}_mode_{k}.txt")):
 def load_modes(pred_dir: str, name: str) -> dict[int, np.ndarray]:
     """Load all mode files for a protein."""
     modes = {}
+    zip_path = get_predictions_zip(pred_dir)
+    if zip_path:
+        import zipfile
+        with zipfile.ZipFile(zip_path, 'r') as zf:
+            namelist = zf.namelist()
+            for k in range(10):
+                for pfx in [f"extracted_{name}", name]:
+                    suffix = f"{pfx}_mode_{k}.txt"
+                    # Fast check if any path ends with suffix
+                    matched = next((f for f in namelist if f.endswith(f"/{suffix}") or f == suffix), None)
+                    if matched:
+                        with zf.open(matched) as f:
+                            modes[k] = np.loadtxt(f)
+                        break
+        if modes:
+            return modes
+    # Fallback for loose files
     for k in range(10):
         for pfx in [f"extracted_{name}", name]:
             mf = os.path.join(pred_dir, f"{pfx}_mode_{k}.txt")