jpuglia
/

ProteinLocationPredictor

Joblib

English

Model card Files Files and versions

xet

Community

jpuglia commited on Sep 13, 2025

Commit

2491d7e

1 Parent(s): 5b2ffea

Update GUI title and button text for clarity in Protein Location Predictor

Browse files

Files changed (2) hide show

gui.py +2 -2
src/my_utils.py +18 -28

gui.py CHANGED Viewed

@@ -104,7 +104,7 @@ def menu():
     # root window
     root = tk.Tk()
     root.geometry('320x200')
-    root.title('Protein Tools Menu')
     # create a menubar
     menubar = Menu(root)
@@ -123,7 +123,7 @@ def menu():
     help_menu.add_command(label='About...')
     menubar.add_cascade(label="Help", menu=help_menu, underline=0)
-    btn_prost = tk.Button(root, text="Predict with Prost",
                            command=run_prost) #Predict with Prost
     btn_prost.pack(pady=5)

     # root window
     root = tk.Tk()
     root.geometry('320x200')
+    root.title('Protein Location Predictor')
     # create a menubar
     menubar = Menu(root)
     help_menu.add_command(label='About...')
     menubar.add_cascade(label="Help", menu=help_menu, underline=0)
+    btn_prost = tk.Button(root, text="Predict with Prost T5",
                            command=run_prost) #Predict with Prost
     btn_prost.pack(pady=5)

src/my_utils.py CHANGED Viewed

@@ -49,27 +49,20 @@ from joblib import load
 import torch
-# Load one chunk of embeddings
 def load_emb(path: str, acc: list[str]) -> np.ndarray:
     """
-    Loads and processes embedding files from a specified directory.
-    For each accession in the provided list, this function loads the corresponding
-    NumPy `.npy` file from the given path, processes the embedding by averaging
-    over axes if necessary, and collects the results.
     Args:
-        path (str): Directory path containing the embedding `.npy` files.
         acc (list[str]): List of accession identifiers corresponding to the embedding files.
     Returns:
-        tuple[np.ndarray, np.ndarray]:
-            - A 2D NumPy array where each row is a processed embedding.
-            - A 1D NumPy array of accession identifiers corresponding to the embeddings.
     Raises:
         FileNotFoundError: If the specified path does not exist.
-    Notes:
-        - If an embedding has 3 dimensions, it is squeezed along axis 0 and then averaged over axis 0.
-        - If an embedding has 2 dimensions, it is averaged over axis 0.
-        - Otherwise, the embedding is used as is.
     """
     if not os.path.exists(path):
@@ -78,7 +71,6 @@ def load_emb(path: str, acc: list[str]) -> np.ndarray:
     total_files = len([f for f in os.listdir(path) if f.endswith('.npy')])
     x = []
-    y = []
     for a in tqdm(acc, desc = 'Cargando embeddings', total=total_files):
@@ -88,20 +80,20 @@ def load_emb(path: str, acc: list[str]) -> np.ndarray:
             emb = emb.squeeze(axis = 0)
             emb = emb.mean(axis = 0)
             x.append(emb)
-            y.append(a)
         elif len(emb.shape) == 2:
             emb = emb.mean(axis = 0)
             x.append(emb)
-            y.append(a)
         else:
             x.append(emb)
-            y.append(a)
     return np.vstack(x)
 def confusion(title : str, y_true: np.ndarray, y_pred: np.ndarray) -> None:
-    """    Plot a confusion matrix for the given true and predicted labels.
     Args:
         title (str): Title for the confusion matrix plot.
         y_true (np.ndarray): True labels.
@@ -125,17 +117,15 @@ def confusion(title : str, y_true: np.ndarray, y_pred: np.ndarray) -> None:
 def plot_umap(x: np.ndarray, y: np.ndarray, title: str) -> None:
     """
-    Plot a 2D UMAP projection of high-dimensional data with color-coded labels and hover information.
-    Args:
-        x (list[np.ndarray]): List of feature arrays to be concatenated and visualized.
-        y (list[str]): List of labels corresponding to each sample in x, used for coloring the scatter plot.
-        title (str): Title of the plot.
-        org (list[str]): List of organism or group identifiers for each sample, shown in hover data.
     Returns:
-        None: Displays an interactive UMAP scatter plot using Plotly.
     """
     reducer = umap.UMAP(n_neighbors=30, random_state=42)
     scaled_x = StandardScaler().fit_transform(x)

 import torch
 def load_emb(path: str, acc: list[str]) -> np.ndarray:
     """
+    Loads and processes embedding files from a specified directory for a list of accession identifiers.
+    Each embedding is expected to be stored as a .npy file named after its accession in the given path.
+    - If the embedding has 3 dimensions, it is squeezed along the first axis and then averaged along the next axis.
+    - If the embedding has 2 dimensions, it is averaged along the first axis.
+    - Otherwise, the embedding is used as is.
     Args:
+        path (str): Directory path where the embedding .npy files are stored.
         acc (list[str]): List of accession identifiers corresponding to the embedding files.
     Returns:
+        np.ndarray: A 2D array where each row corresponds to the processed embedding of an accession.
     Raises:
         FileNotFoundError: If the specified path does not exist.
     """
     if not os.path.exists(path):
     total_files = len([f for f in os.listdir(path) if f.endswith('.npy')])
     x = []
     for a in tqdm(acc, desc = 'Cargando embeddings', total=total_files):
             emb = emb.squeeze(axis = 0)
             emb = emb.mean(axis = 0)
             x.append(emb)
         elif len(emb.shape) == 2:
             emb = emb.mean(axis = 0)
             x.append(emb)
         else:
             x.append(emb)
     return np.vstack(x)
 def confusion(title : str, y_true: np.ndarray, y_pred: np.ndarray) -> None:
+    """
+    Plot a confusion matrix for the given true and predicted labels.
     Args:
         title (str): Title for the confusion matrix plot.
         y_true (np.ndarray): True labels.
 def plot_umap(x: np.ndarray, y: np.ndarray, title: str) -> None:
     """
+    Plots a 2D UMAP projection of high-dimensional data with class labels.
+    Parameters:
+        x (np.ndarray): The input feature matrix of shape (n_samples, n_features).
+        y (np.ndarray): The array of labels corresponding to each sample.
+        title (str): The title for the plot.
     Returns:
+        None: Displays a scatter plot of the UMAP embedding colored by label.
     """
     reducer = umap.UMAP(n_neighbors=30, random_state=42)
     scaled_x = StandardScaler().fit_transform(x)