jpuglia commited on
Commit
f29fbd1
·
1 Parent(s): b73ccc2

Enhance file handling: Update .gitignore and improve prediction output saving with user-defined filenames in my_utils.py

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. cli.py +5 -2
  3. src/my_utils.py +19 -9
.gitignore CHANGED
@@ -2,4 +2,5 @@
2
  *.tab
3
  __pycache__/
4
  *.pyc
5
- /home/juan/ProteinLocationPredictor/notebooks/__pycache__
 
 
2
  *.tab
3
  __pycache__/
4
  *.pyc
5
+ /home/juan/ProteinLocationPredictor/notebooks/__pycache__
6
+ *.txt
cli.py CHANGED
@@ -5,6 +5,7 @@ This module provides a Tkinter-based GUI for loading FASTA files
5
  """
6
  import tkinter as tk
7
  from tkinter import Menu, filedialog, messagebox
 
8
  from src.my_utils import predict_with_prost, predict_with_esm
9
 
10
 
@@ -48,6 +49,7 @@ def run_prost():
48
  messagebox.showerror("Error", "Please load a FASTA file first.")
49
  return
50
  predict_with_prost(FASTA_FILE_PATH)
 
51
 
52
  def run_esm300():
53
  """
@@ -64,6 +66,7 @@ def run_esm300():
64
  return
65
  predict_with_esm(fasta_path = FASTA_FILE_PATH,
66
  model = 'esmc_300m')
 
67
 
68
  def run_esm600():
69
  """
@@ -79,8 +82,8 @@ def run_esm600():
79
  messagebox.showerror("Error", "Please load a FASTA file first.")
80
  return
81
  predict_with_esm(fasta_path = FASTA_FILE_PATH,
82
- model = 'esmc_600m')
83
-
84
  def menu():
85
  """
86
  Displays the main GUI menu for the Protein Tools application.
 
5
  """
6
  import tkinter as tk
7
  from tkinter import Menu, filedialog, messagebox
8
+ import torch
9
  from src.my_utils import predict_with_prost, predict_with_esm
10
 
11
 
 
49
  messagebox.showerror("Error", "Please load a FASTA file first.")
50
  return
51
  predict_with_prost(FASTA_FILE_PATH)
52
+ torch.cuda.empty_cache()
53
 
54
  def run_esm300():
55
  """
 
66
  return
67
  predict_with_esm(fasta_path = FASTA_FILE_PATH,
68
  model = 'esmc_300m')
69
+ torch.cuda.empty_cache()
70
 
71
  def run_esm600():
72
  """
 
82
  messagebox.showerror("Error", "Please load a FASTA file first.")
83
  return
84
  predict_with_esm(fasta_path = FASTA_FILE_PATH,
85
+ model = 'esmc_600m')
86
+ torch.cuda.empty_cache()
87
  def menu():
88
  """
89
  Displays the main GUI menu for the Protein Tools application.
src/my_utils.py CHANGED
@@ -458,10 +458,10 @@ def fetch_refseq_sequence(refseq_id : str):
458
  record = SeqIO.read(handle, "fasta")
459
  handle.close()
460
  return str(record.seq)
461
- except Exception:
462
 
463
  url = f"https://www.rcsb.org/fasta/entry/{refseq_id}"
464
- response = requests.get(url)
465
  if response.status_code == 200:
466
  try:
467
  fasta_data = response.text
@@ -709,8 +709,13 @@ def predict_with_esm(fasta_path : str,
709
  predictions_dict[seq_id] = (list(sorted_classes), list(sorted_probs))
710
 
711
  # Save results
712
- input_filename = os.path.splitext(os.path.basename(fasta_path))[0]
713
- output_file = os.path.join(output_dir, f"{input_filename}_predictions.txt")
 
 
 
 
 
714
 
715
  print(f"Saving predictions to {output_file}...")
716
  save_predictions_to_txt(predictions_dict, output_file)
@@ -813,8 +818,8 @@ def fasta_to_seq(fasta_file: str) -> Optional[tuple[list[str], list[str]]]:
813
  print(f"Error reading {fasta_file}: {e}")
814
  return None
815
 
816
- def save_predictions_to_txt(predictions_dict: dict[str, tuple[list[str], list[float]]],
817
- output_file: str) -> None:
818
  """
819
  Save predictions to a text file in the specified format.
820
 
@@ -823,7 +828,7 @@ def save_predictions_to_txt(predictions_dict: dict[str, tuple[list[str], list[fl
823
  output_file: Path to the output text file
824
  """
825
  with open(output_file, 'w', encoding='utf-8') as f:
826
- f.write("Sequence_ID,Predictions\n") # Header
827
 
828
  for seq_id, (class_names, probabilities) in predictions_dict.items():
829
 
@@ -917,8 +922,13 @@ def predict_with_prost(fasta_path: str):
917
  predictions_dict[seq_id] = (list(sorted_classes), list(sorted_probs))
918
 
919
  # Save results
920
- input_filename = os.path.splitext(os.path.basename(fasta_path))[0]
921
- output_file = os.path.join(output_dir, f"{input_filename}_predictions.txt")
 
 
 
 
 
922
 
923
  print(f"Saving predictions to {output_file}...")
924
  save_predictions_to_txt(predictions_dict, output_file)
 
458
  record = SeqIO.read(handle, "fasta")
459
  handle.close()
460
  return str(record.seq)
461
+ except (HTTPError, ValueError) as e:
462
 
463
  url = f"https://www.rcsb.org/fasta/entry/{refseq_id}"
464
+ response = requests.get(url, timeout=10)
465
  if response.status_code == 200:
466
  try:
467
  fasta_data = response.text
 
709
  predictions_dict[seq_id] = (list(sorted_classes), list(sorted_probs))
710
 
711
  # Save results
712
+ input_filename = f"{os.path.splitext(os.path.basename(fasta_path))[0]}_predictions.txt"
713
+ output_file = filedialog.asksaveasfilename(title="Save Predictions",
714
+ initialdir=output_dir,
715
+ initialfile=input_filename,
716
+ defaultextension=".txt",
717
+ filetypes=[("Text files", "*.txt"), ("All files", "*.*")]
718
+ )
719
 
720
  print(f"Saving predictions to {output_file}...")
721
  save_predictions_to_txt(predictions_dict, output_file)
 
818
  print(f"Error reading {fasta_file}: {e}")
819
  return None
820
 
821
+ def save_predictions_to_txt(predictions_dict: dict[str, tuple[list[str], list[float]]],
822
+ output_file: str) -> None:
823
  """
824
  Save predictions to a text file in the specified format.
825
 
 
828
  output_file: Path to the output text file
829
  """
830
  with open(output_file, 'w', encoding='utf-8') as f:
831
+ f.write("Sequence_ID,Prediction 1,Prediction 2,Prediction 3,Prediction 4,Prediction 5,Prediction 6\n") # Header
832
 
833
  for seq_id, (class_names, probabilities) in predictions_dict.items():
834
 
 
922
  predictions_dict[seq_id] = (list(sorted_classes), list(sorted_probs))
923
 
924
  # Save results
925
+ input_filename = f"{os.path.splitext(os.path.basename(fasta_path))[0]}_predictions.txt"
926
+ output_file = filedialog.asksaveasfilename(title="Save Predictions",
927
+ initialdir=output_dir,
928
+ initialfile=input_filename,
929
+ defaultextension=".txt",
930
+ filetypes=[("Text files", "*.txt"), ("All files", "*.*")]
931
+ )
932
 
933
  print(f"Saving predictions to {output_file}...")
934
  save_predictions_to_txt(predictions_dict, output_file)