koke's picture
app
124bf1a
"From original work: CGR for gene structure"
from typing import Dict, Optional
from collections import namedtuple
# coordinates for x+iy
Coord = namedtuple("Coord", ["x","y"])
# coordinates for a CGR encoding
CGRCoords = namedtuple("CGRCoords", ["N","x","y"])
# coordinates for each nucleotide in the 2d-plane
DEFAULT_COORDS = dict(A=Coord(1,1),C=Coord(-1,1),G=Coord(-1,-1),T=Coord(1,-1))
class CGR:
"Chaos Game Representation for DNA"
def __init__(self, coords: Optional[Dict[chr,tuple]]=None):
self.nucleotide_coords = DEFAULT_COORDS if coords is None else coords
self.cgr_coords = CGRCoords(0,0,0)
def nucleotide_by_coords(self,x,y):
"Get nucleotide by coordinates (x,y)"
# filter nucleotide by coordinates
filtered = dict(filter(lambda item: item[1] == Coord(x,y), self.nucleotide_coords.items()))
return list(filtered.keys())[0]
def forward(self, nucleotide: str):
"Compute next CGR coordinates"
x = (self.cgr_coords.x + self.nucleotide_coords.get(nucleotide).x)/2
y = (self.cgr_coords.y + self.nucleotide_coords.get(nucleotide).y)/2
# update cgr_coords
self.cgr_coords = CGRCoords(self.cgr_coords.N+1,x,y)
def backward(self,):
"Compute last CGR coordinates. Current nucleotide can be inferred from (x,y)"
# get current nucleotide based on coordinates
n_x,n_y = self.coords_current_nucleotide()
nucleotide = self.nucleotide_by_coords(n_x,n_y)
# update coordinates to the previous one
x = 2*self.cgr_coords.x - n_x
y = 2*self.cgr_coords.y - n_y
# update cgr_coords
self.cgr_coords = CGRCoords(self.cgr_coords.N-1,x,y)
return nucleotide
def coords_current_nucleotide(self,):
x = 1 if self.cgr_coords.x>0 else -1
y = 1 if self.cgr_coords.y>0 else -1
return x,y
def encode(self, sequence: str):
"From DNA sequence to CGR"
# reset starting position to (0,0,0)
self.reset_coords()
for nucleotide in sequence:
self.forward(nucleotide)
return self.cgr_coords
def reset_coords(self,):
self.cgr_coords = CGRCoords(0,0,0)
def decode(self, N:int, x:int, y:int)->str:
"From CGR to DNA sequence"
self.cgr_coords = CGRCoords(N,x,y)
# decoded sequence
sequence = []
# Recover the entire genome
while self.cgr_coords.N>0:
nucleotide = self.backward()
sequence.append(nucleotide)
return "".join(sequence[::-1])