File size: 614 Bytes
f34af6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from Bio.SeqIO.FastaIO import FastaIterator
import os

def list_files_in_dir(dir, extension=".pdb"):
    paths = []
    for filename in os.listdir(dir):
        full_path = os.path.abspath(os.path.join(dir, filename))
        if filename.endswith(extension):
            paths.append(full_path)
    
    paths.sort()
    return paths

def extract_seqs_from_dir(dir, extension=".fa"):
    file_list = list_files_in_dir(dir, extension)
    sequences = []
    for file in file_list:
        with open(file, "r") as f:
            sequences.extend([str(record.seq) for record in FastaIterator(f)])
    return sequences