File size: 1,036 Bytes
c983e7d
124bf1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c983e7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import re
from PIL import Image
import numpy as np


def clean_seq(seq):
    "Remove all characters different from A,C,G,T or N"
    seq = seq.upper()
    for letter in "BDEFHIJKLMOPQRSUVWXYZ":
        seq = seq.replace(letter,"N")
    return seq

def array2img(array):
    "FCGR array to grayscale image"
    max_color = 255
    m, M = array.min(), array.max()
    # rescale to [0,1]
    img_rescaled = (array - m) / (M-m) 

    # invert colors black->white
    img_array = np.ceil(max_color - img_rescaled*max_color)
    img_array = np.array(img_array, dtype=np.int8)

    # convert to Image 
    img_pil = Image.fromarray(img_array,'L')
    return img_pil

def count_seqs(fasta):
    "Count number of '>' in a fasta file to use with a progress bar"
    pattern = ">"
    count   = 0
    for line in fasta:
        if re.search(pattern, line):
            count +=1
    return count

def generate_fcgr(kmer, fasta, fcgr):
    "Generate Image FCGR"
    array = fcgr(clean_seq(str(fasta.seq)))
    img = array2img(array)
    return img