import re from PIL import Image import numpy as np def clean_seq(seq): "Remove all characters different from A,C,G,T or N" seq = seq.upper() for letter in "BDEFHIJKLMOPQRSUVWXYZ": seq = seq.replace(letter,"N") return seq def array2img(array): "FCGR array to grayscale image" max_color = 255 m, M = array.min(), array.max() # rescale to [0,1] img_rescaled = (array - m) / (M-m) # invert colors black->white img_array = np.ceil(max_color - img_rescaled*max_color) img_array = np.array(img_array, dtype=np.int8) # convert to Image img_pil = Image.fromarray(img_array,'L') return img_pil def count_seqs(fasta): "Count number of '>' in a fasta file to use with a progress bar" pattern = ">" count = 0 for line in fasta: if re.search(pattern, line): count +=1 return count def generate_fcgr(kmer, fasta, fcgr): "Generate Image FCGR" array = fcgr(clean_seq(str(fasta.seq))) img = array2img(array) return img