koke's picture
models for 6,7,8,9 mers included
c983e7d
import re
from PIL import Image
import numpy as np
def clean_seq(seq):
"Remove all characters different from A,C,G,T or N"
seq = seq.upper()
for letter in "BDEFHIJKLMOPQRSUVWXYZ":
seq = seq.replace(letter,"N")
return seq
def array2img(array):
"FCGR array to grayscale image"
max_color = 255
m, M = array.min(), array.max()
# rescale to [0,1]
img_rescaled = (array - m) / (M-m)
# invert colors black->white
img_array = np.ceil(max_color - img_rescaled*max_color)
img_array = np.array(img_array, dtype=np.int8)
# convert to Image
img_pil = Image.fromarray(img_array,'L')
return img_pil
def count_seqs(fasta):
"Count number of '>' in a fasta file to use with a progress bar"
pattern = ">"
count = 0
for line in fasta:
if re.search(pattern, line):
count +=1
return count
def generate_fcgr(kmer, fasta, fcgr):
"Generate Image FCGR"
array = fcgr(clean_seq(str(fasta.seq)))
img = array2img(array)
return img