File size: 614 Bytes
f34af6f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | from Bio.SeqIO.FastaIO import FastaIterator
import os
def list_files_in_dir(dir, extension=".pdb"):
paths = []
for filename in os.listdir(dir):
full_path = os.path.abspath(os.path.join(dir, filename))
if filename.endswith(extension):
paths.append(full_path)
paths.sort()
return paths
def extract_seqs_from_dir(dir, extension=".fa"):
file_list = list_files_in_dir(dir, extension)
sequences = []
for file in file_list:
with open(file, "r") as f:
sequences.extend([str(record.seq) for record in FastaIterator(f)])
return sequences |