Enhance file handling: Update .gitignore and improve prediction output saving with user-defined filenames in my_utils.py

Browse files

Files changed (3) hide show

.gitignore +2 -1
cli.py +5 -2
src/my_utils.py +19 -9

.gitignore CHANGED Viewed

@@ -2,4 +2,5 @@
 *.tab
 __pycache__/
 *.pyc
-/home/juan/ProteinLocationPredictor/notebooks/__pycache__

 *.tab
 __pycache__/
 *.pyc
+/home/juan/ProteinLocationPredictor/notebooks/__pycache__
+*.txt

cli.py CHANGED Viewed

@@ -5,6 +5,7 @@ This module provides a Tkinter-based GUI for loading FASTA files
 """
 import tkinter as tk
 from tkinter import Menu, filedialog, messagebox
 from src.my_utils import predict_with_prost, predict_with_esm
@@ -48,6 +49,7 @@ def run_prost():
         messagebox.showerror("Error", "Please load a FASTA file first.")
         return
     predict_with_prost(FASTA_FILE_PATH)
 def run_esm300():
     """
@@ -64,6 +66,7 @@ def run_esm300():
         return
     predict_with_esm(fasta_path = FASTA_FILE_PATH,
                      model = 'esmc_300m')
 def run_esm600():
     """
@@ -79,8 +82,8 @@ def run_esm600():
         messagebox.showerror("Error", "Please load a FASTA file first.")
         return
     predict_with_esm(fasta_path = FASTA_FILE_PATH,
-                     model = 'esmc_600m')
 def menu():
     """
     Displays the main GUI menu for the Protein Tools application.

 """
 import tkinter as tk
 from tkinter import Menu, filedialog, messagebox
+import torch
 from src.my_utils import predict_with_prost, predict_with_esm
         messagebox.showerror("Error", "Please load a FASTA file first.")
         return
     predict_with_prost(FASTA_FILE_PATH)
+    torch.cuda.empty_cache()
 def run_esm300():
     """
         return
     predict_with_esm(fasta_path = FASTA_FILE_PATH,
                      model = 'esmc_300m')
+    torch.cuda.empty_cache()
 def run_esm600():
     """
         messagebox.showerror("Error", "Please load a FASTA file first.")
         return
     predict_with_esm(fasta_path = FASTA_FILE_PATH,
+                     model = 'esmc_600m')
+    torch.cuda.empty_cache()
 def menu():
     """
     Displays the main GUI menu for the Protein Tools application.

src/my_utils.py CHANGED Viewed

@@ -458,10 +458,10 @@ def fetch_refseq_sequence(refseq_id : str):
         record = SeqIO.read(handle, "fasta")
         handle.close()
         return str(record.seq)
-    except Exception:
         url = f"https://www.rcsb.org/fasta/entry/{refseq_id}"
-        response = requests.get(url)
         if response.status_code == 200:
             try:
                 fasta_data = response.text
@@ -709,8 +709,13 @@ def predict_with_esm(fasta_path : str,
         predictions_dict[seq_id] = (list(sorted_classes), list(sorted_probs))
     # Save results
-    input_filename = os.path.splitext(os.path.basename(fasta_path))[0]
-    output_file = os.path.join(output_dir, f"{input_filename}_predictions.txt")
     print(f"Saving predictions to {output_file}...")
     save_predictions_to_txt(predictions_dict, output_file)
@@ -813,8 +818,8 @@ def fasta_to_seq(fasta_file: str) -> Optional[tuple[list[str], list[str]]]:
             print(f"Error reading {fasta_file}: {e}")
             return None
-def save_predictions_to_txt(predictions_dict: dict[str, tuple[list[str], list[float]]],
-                           output_file: str) -> None:
     """
     Save predictions to a text file in the specified format.
@@ -823,7 +828,7 @@ def save_predictions_to_txt(predictions_dict: dict[str, tuple[list[str], list[fl
         output_file: Path to the output text file
     """
     with open(output_file, 'w', encoding='utf-8') as f:
-        f.write("Sequence_ID,Predictions\n")  # Header
         for seq_id, (class_names, probabilities) in predictions_dict.items():
@@ -917,8 +922,13 @@ def predict_with_prost(fasta_path: str):
         predictions_dict[seq_id] = (list(sorted_classes), list(sorted_probs))
     # Save results
-    input_filename = os.path.splitext(os.path.basename(fasta_path))[0]
-    output_file = os.path.join(output_dir, f"{input_filename}_predictions.txt")
     print(f"Saving predictions to {output_file}...")
     save_predictions_to_txt(predictions_dict, output_file)

         record = SeqIO.read(handle, "fasta")
         handle.close()
         return str(record.seq)
+    except (HTTPError, ValueError) as e:
         url = f"https://www.rcsb.org/fasta/entry/{refseq_id}"
+        response = requests.get(url, timeout=10)
         if response.status_code == 200:
             try:
                 fasta_data = response.text
         predictions_dict[seq_id] = (list(sorted_classes), list(sorted_probs))
     # Save results
+    input_filename = f"{os.path.splitext(os.path.basename(fasta_path))[0]}_predictions.txt"
+    output_file = filedialog.asksaveasfilename(title="Save Predictions",
+                                               initialdir=output_dir,
+                                               initialfile=input_filename,
+                                               defaultextension=".txt",
+                                               filetypes=[("Text files", "*.txt"), ("All files", "*.*")]
+                                               )
     print(f"Saving predictions to {output_file}...")
     save_predictions_to_txt(predictions_dict, output_file)
             print(f"Error reading {fasta_file}: {e}")
             return None
+def save_predictions_to_txt(predictions_dict: dict[str, tuple[list[str], list[float]]],
+                            output_file: str) -> None:
     """
     Save predictions to a text file in the specified format.
         output_file: Path to the output text file
     """
     with open(output_file, 'w', encoding='utf-8') as f:
+        f.write("Sequence_ID,Prediction 1,Prediction 2,Prediction 3,Prediction 4,Prediction 5,Prediction 6\n")  # Header
         for seq_id, (class_names, probabilities) in predictions_dict.items():
         predictions_dict[seq_id] = (list(sorted_classes), list(sorted_probs))
     # Save results
+    input_filename = f"{os.path.splitext(os.path.basename(fasta_path))[0]}_predictions.txt"
+    output_file = filedialog.asksaveasfilename(title="Save Predictions",
+                                               initialdir=output_dir,
+                                               initialfile=input_filename,
+                                               defaultextension=".txt",
+                                               filetypes=[("Text files", "*.txt"), ("All files", "*.*")]
+                                               )
     print(f"Saving predictions to {output_file}...")
     save_predictions_to_txt(predictions_dict, output_file)