Spaces:
Running
Running
File size: 1,622 Bytes
e7fe90a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | #!/usr/bin/env python3
import sys
from pathlib import Path
if len(sys.argv) != 4:
print("Usage: make_combined_multimer_fasta.py <seqs_dir> <epitope_seq> <output_fasta>")
sys.exit(1)
seqs_dir = Path(sys.argv[1])
epitope_raw = sys.argv[2]
outfile = Path(sys.argv[3])
# Normalize epitope string
epitope = epitope_raw.strip()
if epitope.lower() == "none" or epitope == "":
epitope = None
else:
# allow comma-separated input as well
epitope = epitope.replace(" ", "")
epitope = epitope.replace(",", ":")
# Find the only .fa file in seqs_dir
fas = list(seqs_dir.glob("*.fa"))
if len(fas) == 0:
raise FileNotFoundError(f"No FASTA files found in {seqs_dir}")
if len(fas) > 1:
raise RuntimeError(f"Expected exactly one FASTA in {seqs_dir}, found {len(fas)}")
fa = fas[0]
base = fa.stem # basename for labeling
def format_seq(s):
if epitope is None:
return s
else:
return f"{s}:{epitope}"
with open(fa) as f, open(outfile, "w") as out:
lines = [l.strip() for l in f.readlines() if l.strip()]
i = 0
record_index = 0
while i < len(lines):
header = lines[i]
seq = lines[i+1] if i+1 < len(lines) else ""
i += 2
if not header.startswith(">"):
continue
if record_index == 0:
new_header = f">{base}_WT"
else:
new_header = f">{base}_design_{record_index}"
out.write(new_header + "\n")
out.write(format_seq(seq) + "\n")
record_index += 1
print(f"Wrote combined FASTA to {outfile}")
|