Spaces:
Runtime error
Runtime error
| from .cgr import CGR | |
| from itertools import product | |
| from collections import defaultdict | |
| import numpy as np | |
| class FCGR(CGR): | |
| """Frequency matrix CGR | |
| an (2**k x 2**k) 2D representation will be created for a | |
| n-long sequence. | |
| - k represents the k-mer. | |
| - 2**k x 2**k = 4**k the total number of k-mers (sequences of length k) | |
| - pixel value correspond to the value of the frequency for each k-mer | |
| """ | |
| def __init__(self, k: int,): | |
| super().__init__() | |
| self.k = k # k-mer representation | |
| self.kmers = list("".join(kmer) for kmer in product("ACGT", repeat=self.k)) | |
| self.kmer2pixel = self.kmer2pixel_position() | |
| def __call__(self, sequence: str): | |
| "Given a DNA sequence, returns an array with his frequencies in the same order as FCGR" | |
| self.count_kmers(sequence) | |
| # Create an empty array to save the FCGR values | |
| array_size = int(2**self.k) | |
| freq_matrix = np.zeros((array_size,array_size)) | |
| # Assign frequency to each box in the matrix | |
| for kmer, freq in self.freq_kmer.items(): | |
| pos_x, pos_y = self.kmer2pixel[kmer] | |
| freq_matrix[int(pos_x)-1,int(pos_y)-1] = freq | |
| return freq_matrix | |
| def count_kmer(self, kmer): | |
| if "N" not in kmer: | |
| self.freq_kmer[kmer] += 1 | |
| def count_kmers(self, sequence: str): | |
| self.freq_kmer = defaultdict(int) | |
| # representativity of kmers | |
| last_j = len(sequence) - self.k + 1 | |
| kmers = (sequence[i:(i+self.k)] for i in range(last_j)) | |
| # count kmers in a dictionary | |
| list(self.count_kmer(kmer) for kmer in kmers) | |
| def kmer_probabilities(self, sequence: str): | |
| self.probabilities = defaultdict(float) | |
| N=len(sequence) | |
| for key, value in self.freq_kmer.items(): | |
| self.probabilities[key] = float(value) / (N - self.k + 1) | |
| def pixel_position(self, kmer: str): | |
| "Get pixel position in the FCGR matrix for a k-mer" | |
| coords = self.encode(kmer) | |
| N,x,y = coords.N, coords.x, coords.y | |
| # Coordinates from [-1,1]² to [1,2**k]² | |
| np_coords = np.array([(x + 1)/2, (y + 1)/2]) # move coordinates from [-1,1]² to [0,1]² | |
| np_coords *= 2**self.k # rescale coordinates from [0,1]² to [0,2**k]² | |
| x,y = np.ceil(np_coords) # round to upper integer | |
| # Turn coordinates (cx,cy) into pixel (px,py) position | |
| # px = 2**k-cy+1, py = cx | |
| return 2**self.k-int(y)+1, int(x) | |
| def kmer2pixel_position(self,): | |
| kmer2pixel = dict() | |
| for kmer in self.kmers: | |
| kmer2pixel[kmer] = self.pixel_position(kmer) | |
| return kmer2pixel |