Enhance file handling: Update .gitignore and improve prediction output saving with user-defined filenames in my_utils.py
Browse files- .gitignore +2 -1
- cli.py +5 -2
- src/my_utils.py +19 -9
.gitignore
CHANGED
|
@@ -2,4 +2,5 @@
|
|
| 2 |
*.tab
|
| 3 |
__pycache__/
|
| 4 |
*.pyc
|
| 5 |
-
/home/juan/ProteinLocationPredictor/notebooks/__pycache__
|
|
|
|
|
|
| 2 |
*.tab
|
| 3 |
__pycache__/
|
| 4 |
*.pyc
|
| 5 |
+
/home/juan/ProteinLocationPredictor/notebooks/__pycache__
|
| 6 |
+
*.txt
|
cli.py
CHANGED
|
@@ -5,6 +5,7 @@ This module provides a Tkinter-based GUI for loading FASTA files
|
|
| 5 |
"""
|
| 6 |
import tkinter as tk
|
| 7 |
from tkinter import Menu, filedialog, messagebox
|
|
|
|
| 8 |
from src.my_utils import predict_with_prost, predict_with_esm
|
| 9 |
|
| 10 |
|
|
@@ -48,6 +49,7 @@ def run_prost():
|
|
| 48 |
messagebox.showerror("Error", "Please load a FASTA file first.")
|
| 49 |
return
|
| 50 |
predict_with_prost(FASTA_FILE_PATH)
|
|
|
|
| 51 |
|
| 52 |
def run_esm300():
|
| 53 |
"""
|
|
@@ -64,6 +66,7 @@ def run_esm300():
|
|
| 64 |
return
|
| 65 |
predict_with_esm(fasta_path = FASTA_FILE_PATH,
|
| 66 |
model = 'esmc_300m')
|
|
|
|
| 67 |
|
| 68 |
def run_esm600():
|
| 69 |
"""
|
|
@@ -79,8 +82,8 @@ def run_esm600():
|
|
| 79 |
messagebox.showerror("Error", "Please load a FASTA file first.")
|
| 80 |
return
|
| 81 |
predict_with_esm(fasta_path = FASTA_FILE_PATH,
|
| 82 |
-
model = 'esmc_600m')
|
| 83 |
-
|
| 84 |
def menu():
|
| 85 |
"""
|
| 86 |
Displays the main GUI menu for the Protein Tools application.
|
|
|
|
| 5 |
"""
|
| 6 |
import tkinter as tk
|
| 7 |
from tkinter import Menu, filedialog, messagebox
|
| 8 |
+
import torch
|
| 9 |
from src.my_utils import predict_with_prost, predict_with_esm
|
| 10 |
|
| 11 |
|
|
|
|
| 49 |
messagebox.showerror("Error", "Please load a FASTA file first.")
|
| 50 |
return
|
| 51 |
predict_with_prost(FASTA_FILE_PATH)
|
| 52 |
+
torch.cuda.empty_cache()
|
| 53 |
|
| 54 |
def run_esm300():
|
| 55 |
"""
|
|
|
|
| 66 |
return
|
| 67 |
predict_with_esm(fasta_path = FASTA_FILE_PATH,
|
| 68 |
model = 'esmc_300m')
|
| 69 |
+
torch.cuda.empty_cache()
|
| 70 |
|
| 71 |
def run_esm600():
|
| 72 |
"""
|
|
|
|
| 82 |
messagebox.showerror("Error", "Please load a FASTA file first.")
|
| 83 |
return
|
| 84 |
predict_with_esm(fasta_path = FASTA_FILE_PATH,
|
| 85 |
+
model = 'esmc_600m')
|
| 86 |
+
torch.cuda.empty_cache()
|
| 87 |
def menu():
|
| 88 |
"""
|
| 89 |
Displays the main GUI menu for the Protein Tools application.
|
src/my_utils.py
CHANGED
|
@@ -458,10 +458,10 @@ def fetch_refseq_sequence(refseq_id : str):
|
|
| 458 |
record = SeqIO.read(handle, "fasta")
|
| 459 |
handle.close()
|
| 460 |
return str(record.seq)
|
| 461 |
-
except
|
| 462 |
|
| 463 |
url = f"https://www.rcsb.org/fasta/entry/{refseq_id}"
|
| 464 |
-
response = requests.get(url)
|
| 465 |
if response.status_code == 200:
|
| 466 |
try:
|
| 467 |
fasta_data = response.text
|
|
@@ -709,8 +709,13 @@ def predict_with_esm(fasta_path : str,
|
|
| 709 |
predictions_dict[seq_id] = (list(sorted_classes), list(sorted_probs))
|
| 710 |
|
| 711 |
# Save results
|
| 712 |
-
input_filename = os.path.splitext(os.path.basename(fasta_path))[0]
|
| 713 |
-
output_file =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
|
| 715 |
print(f"Saving predictions to {output_file}...")
|
| 716 |
save_predictions_to_txt(predictions_dict, output_file)
|
|
@@ -813,8 +818,8 @@ def fasta_to_seq(fasta_file: str) -> Optional[tuple[list[str], list[str]]]:
|
|
| 813 |
print(f"Error reading {fasta_file}: {e}")
|
| 814 |
return None
|
| 815 |
|
| 816 |
-
def save_predictions_to_txt(predictions_dict: dict[str, tuple[list[str], list[float]]],
|
| 817 |
-
|
| 818 |
"""
|
| 819 |
Save predictions to a text file in the specified format.
|
| 820 |
|
|
@@ -823,7 +828,7 @@ def save_predictions_to_txt(predictions_dict: dict[str, tuple[list[str], list[fl
|
|
| 823 |
output_file: Path to the output text file
|
| 824 |
"""
|
| 825 |
with open(output_file, 'w', encoding='utf-8') as f:
|
| 826 |
-
f.write("Sequence_ID,
|
| 827 |
|
| 828 |
for seq_id, (class_names, probabilities) in predictions_dict.items():
|
| 829 |
|
|
@@ -917,8 +922,13 @@ def predict_with_prost(fasta_path: str):
|
|
| 917 |
predictions_dict[seq_id] = (list(sorted_classes), list(sorted_probs))
|
| 918 |
|
| 919 |
# Save results
|
| 920 |
-
input_filename = os.path.splitext(os.path.basename(fasta_path))[0]
|
| 921 |
-
output_file =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 922 |
|
| 923 |
print(f"Saving predictions to {output_file}...")
|
| 924 |
save_predictions_to_txt(predictions_dict, output_file)
|
|
|
|
| 458 |
record = SeqIO.read(handle, "fasta")
|
| 459 |
handle.close()
|
| 460 |
return str(record.seq)
|
| 461 |
+
except (HTTPError, ValueError) as e:
|
| 462 |
|
| 463 |
url = f"https://www.rcsb.org/fasta/entry/{refseq_id}"
|
| 464 |
+
response = requests.get(url, timeout=10)
|
| 465 |
if response.status_code == 200:
|
| 466 |
try:
|
| 467 |
fasta_data = response.text
|
|
|
|
| 709 |
predictions_dict[seq_id] = (list(sorted_classes), list(sorted_probs))
|
| 710 |
|
| 711 |
# Save results
|
| 712 |
+
input_filename = f"{os.path.splitext(os.path.basename(fasta_path))[0]}_predictions.txt"
|
| 713 |
+
output_file = filedialog.asksaveasfilename(title="Save Predictions",
|
| 714 |
+
initialdir=output_dir,
|
| 715 |
+
initialfile=input_filename,
|
| 716 |
+
defaultextension=".txt",
|
| 717 |
+
filetypes=[("Text files", "*.txt"), ("All files", "*.*")]
|
| 718 |
+
)
|
| 719 |
|
| 720 |
print(f"Saving predictions to {output_file}...")
|
| 721 |
save_predictions_to_txt(predictions_dict, output_file)
|
|
|
|
| 818 |
print(f"Error reading {fasta_file}: {e}")
|
| 819 |
return None
|
| 820 |
|
| 821 |
+
def save_predictions_to_txt(predictions_dict: dict[str, tuple[list[str], list[float]]],
|
| 822 |
+
output_file: str) -> None:
|
| 823 |
"""
|
| 824 |
Save predictions to a text file in the specified format.
|
| 825 |
|
|
|
|
| 828 |
output_file: Path to the output text file
|
| 829 |
"""
|
| 830 |
with open(output_file, 'w', encoding='utf-8') as f:
|
| 831 |
+
f.write("Sequence_ID,Prediction 1,Prediction 2,Prediction 3,Prediction 4,Prediction 5,Prediction 6\n") # Header
|
| 832 |
|
| 833 |
for seq_id, (class_names, probabilities) in predictions_dict.items():
|
| 834 |
|
|
|
|
| 922 |
predictions_dict[seq_id] = (list(sorted_classes), list(sorted_probs))
|
| 923 |
|
| 924 |
# Save results
|
| 925 |
+
input_filename = f"{os.path.splitext(os.path.basename(fasta_path))[0]}_predictions.txt"
|
| 926 |
+
output_file = filedialog.asksaveasfilename(title="Save Predictions",
|
| 927 |
+
initialdir=output_dir,
|
| 928 |
+
initialfile=input_filename,
|
| 929 |
+
defaultextension=".txt",
|
| 930 |
+
filetypes=[("Text files", "*.txt"), ("All files", "*.*")]
|
| 931 |
+
)
|
| 932 |
|
| 933 |
print(f"Saving predictions to {output_file}...")
|
| 934 |
save_predictions_to_txt(predictions_dict, output_file)
|