Spaces:
Runtime error
Runtime error
| "From original work: CGR for gene structure" | |
| from typing import Dict, Optional | |
| from collections import namedtuple | |
| # coordinates for x+iy | |
| Coord = namedtuple("Coord", ["x","y"]) | |
| # coordinates for a CGR encoding | |
| CGRCoords = namedtuple("CGRCoords", ["N","x","y"]) | |
| # coordinates for each nucleotide in the 2d-plane | |
| DEFAULT_COORDS = dict(A=Coord(1,1),C=Coord(-1,1),G=Coord(-1,-1),T=Coord(1,-1)) | |
| class CGR: | |
| "Chaos Game Representation for DNA" | |
| def __init__(self, coords: Optional[Dict[chr,tuple]]=None): | |
| self.nucleotide_coords = DEFAULT_COORDS if coords is None else coords | |
| self.cgr_coords = CGRCoords(0,0,0) | |
| def nucleotide_by_coords(self,x,y): | |
| "Get nucleotide by coordinates (x,y)" | |
| # filter nucleotide by coordinates | |
| filtered = dict(filter(lambda item: item[1] == Coord(x,y), self.nucleotide_coords.items())) | |
| return list(filtered.keys())[0] | |
| def forward(self, nucleotide: str): | |
| "Compute next CGR coordinates" | |
| x = (self.cgr_coords.x + self.nucleotide_coords.get(nucleotide).x)/2 | |
| y = (self.cgr_coords.y + self.nucleotide_coords.get(nucleotide).y)/2 | |
| # update cgr_coords | |
| self.cgr_coords = CGRCoords(self.cgr_coords.N+1,x,y) | |
| def backward(self,): | |
| "Compute last CGR coordinates. Current nucleotide can be inferred from (x,y)" | |
| # get current nucleotide based on coordinates | |
| n_x,n_y = self.coords_current_nucleotide() | |
| nucleotide = self.nucleotide_by_coords(n_x,n_y) | |
| # update coordinates to the previous one | |
| x = 2*self.cgr_coords.x - n_x | |
| y = 2*self.cgr_coords.y - n_y | |
| # update cgr_coords | |
| self.cgr_coords = CGRCoords(self.cgr_coords.N-1,x,y) | |
| return nucleotide | |
| def coords_current_nucleotide(self,): | |
| x = 1 if self.cgr_coords.x>0 else -1 | |
| y = 1 if self.cgr_coords.y>0 else -1 | |
| return x,y | |
| def encode(self, sequence: str): | |
| "From DNA sequence to CGR" | |
| # reset starting position to (0,0,0) | |
| self.reset_coords() | |
| for nucleotide in sequence: | |
| self.forward(nucleotide) | |
| return self.cgr_coords | |
| def reset_coords(self,): | |
| self.cgr_coords = CGRCoords(0,0,0) | |
| def decode(self, N:int, x:int, y:int)->str: | |
| "From CGR to DNA sequence" | |
| self.cgr_coords = CGRCoords(N,x,y) | |
| # decoded sequence | |
| sequence = [] | |
| # Recover the entire genome | |
| while self.cgr_coords.N>0: | |
| nucleotide = self.backward() | |
| sequence.append(nucleotide) | |
| return "".join(sequence[::-1]) |