Spaces:
No application file
No application file
| #!/usr/bin/env python | |
| # | |
| # Restriction Analysis Libraries. | |
| # Copyright (C) 2004. Frederic Sohm. | |
| # | |
| # This code is part of the Biopython distribution and governed by its | |
| # license. Please see the LICENSE file that should have been included | |
| # as part of this package. | |
| # | |
| r"""Print the results of restriction enzyme analysis. | |
| PrintFormat prints the results from restriction analysis in 3 different | |
| format: list, column or map. | |
| The easiest way to use it is: | |
| >>> from Bio.Restriction.PrintFormat import PrintFormat | |
| >>> from Bio.Restriction.Restriction import RestrictionBatch | |
| >>> from Bio.Seq import Seq | |
| >>> pBs_mcs = Seq('GGTACCGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGATATCGAATTC') | |
| >>> restriction_batch = RestrictionBatch(['EcoRI', 'BamHI', 'ApaI']) | |
| >>> result = restriction_batch.search(pBs_mcs) | |
| >>> my_map = PrintFormat() | |
| >>> my_map.print_that(result, 'My pBluescript mcs analysis:\n', | |
| ... 'No site:\n') | |
| My pBluescript mcs analysis: | |
| ApaI : 12. | |
| EcoRI : 50. | |
| No site: | |
| BamHI | |
| <BLANKLINE> | |
| >>> my_map.sequence = pBs_mcs | |
| >>> my_map.print_as("map") | |
| >>> my_map.print_that(result) | |
| 12 ApaI | |
| | | |
| | 50 EcoRI | |
| | | | |
| GGTACCGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGATATCGAATTC | |
| |||||||||||||||||||||||||||||||||||||||||||||||||||||| | |
| CCATGGCCCGGGGGGGAGCTCCAGCTGCCATAGCTATTCGAACTATAGCTTAAG | |
| 1 54 | |
| <BLANKLINE> | |
| <BLANKLINE> | |
| Enzymes which do not cut the sequence. | |
| <BLANKLINE> | |
| BamHI | |
| <BLANKLINE> | |
| >>> | |
| Some of the methods of PrintFormat are meant to be overridden by derived | |
| class. | |
| Use the following parameters to control the appearance: | |
| - ConsoleWidth : width of the console used default to 80. | |
| should never be less than 60. | |
| - NameWidth : space attributed to the name in PrintList method. | |
| - Indent : Indent of the second line. | |
| - MaxSize : Maximal size of the sequence (default=6: | |
| -> 99 999 bp + 1 trailing ',' | |
| people are unlikely to ask for restriction map of sequences | |
| bigger than 100.000 bp. This is needed to determine the | |
| space to be reserved for sites location. | |
| - MaxSize = 5 => 9.999 bp | |
| - MaxSize = 6 => 99.999 bp | |
| - MaxSize = 7 => 999.999 bp | |
| Example output:: | |
| <------------ ConsoleWidth ---------------> | |
| <- NameWidth -> | |
| EcoRI : 1, 45, 50, 300, 400, 650, | |
| 700, 1200, 2500. | |
| <--> | |
| Indent | |
| """ # noqa: W291 | |
| import re | |
| class PrintFormat: | |
| """PrintFormat allow the printing of results of restriction analysis.""" | |
| ConsoleWidth = 80 | |
| NameWidth = 10 | |
| MaxSize = 6 | |
| Cmodulo = ConsoleWidth % NameWidth | |
| PrefWidth = ConsoleWidth - Cmodulo | |
| Indent = 4 | |
| linesize = PrefWidth - NameWidth | |
| def print_as(self, what="list"): | |
| """Print the results as specified. | |
| Valid format are: | |
| 'list' -> alphabetical order | |
| 'number' -> number of sites in the sequence | |
| 'map' -> a map representation of the sequence with the sites. | |
| If you want more flexibility over-ride the virtual method make_format. | |
| """ | |
| if what == "map": | |
| self.make_format = self._make_map | |
| elif what == "number": | |
| self.make_format = self._make_number | |
| else: | |
| self.make_format = self._make_list | |
| def format_output(self, dct, title="", s1=""): | |
| """Summarise results as a nicely formatted string. | |
| Arguments: | |
| - dct is a dictionary as returned by a RestrictionBatch.search() | |
| - title is the title of the map. | |
| It must be a formatted string, i.e. you must include the line break. | |
| - s1 is the title separating the list of enzymes that have sites from | |
| those without sites. | |
| - s1 must be a formatted string as well. | |
| The format of print_that is a list. | |
| """ | |
| if not dct: | |
| dct = self.results | |
| ls, nc = [], [] | |
| for k, v in dct.items(): | |
| if v: | |
| ls.append((k, v)) | |
| else: | |
| nc.append(k) | |
| return self.make_format(ls, title, nc, s1) | |
| def print_that(self, dct, title="", s1=""): | |
| """Print the output of the format_output method (OBSOLETE). | |
| Arguments: | |
| - dct is a dictionary as returned by a RestrictionBatch.search() | |
| - title is the title of the map. | |
| It must be a formatted string, i.e. you must include the line break. | |
| - s1 is the title separating the list of enzymes that have sites from | |
| those without sites. | |
| - s1 must be a formatted string as well. | |
| This method prints the output of A.format_output() and it is here | |
| for backwards compatibility. | |
| """ | |
| print(self.format_output(dct, title, s1)) | |
| def make_format(self, cut=(), title="", nc=(), s1=""): | |
| """Virtual method used for formatting results. | |
| Virtual method. | |
| Here to be pointed to one of the _make_* methods. | |
| You can as well create a new method and point make_format to it. | |
| """ | |
| return self._make_list(cut, title, nc, s1) | |
| # _make_* methods to be used with the virtual method make_format | |
| def _make_list(self, ls, title, nc, s1): | |
| """Summarise a list of positions by enzyme (PRIVATE). | |
| Return a string of form:: | |
| title. | |
| enzyme1 : position1, position2. | |
| enzyme2 : position1, position2, position3. | |
| Arguments: | |
| - ls is a tuple or list of cutting enzymes. | |
| - title is the title. | |
| - nc is a tuple or list of non cutting enzymes. | |
| - s1 is the sentence before the non cutting enzymes. | |
| """ | |
| return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1) | |
| def _make_map(self, ls, title, nc, s1): | |
| """Summarise mapping information as a string (PRIVATE). | |
| Return a string of form:: | |
| | title. | |
| | | |
| | enzyme1, position | |
| | | | |
| | AAAAAAAAAAAAAAAAAAAAA... | |
| | ||||||||||||||||||||| | |
| | TTTTTTTTTTTTTTTTTTTTT... | |
| Arguments: | |
| - ls is a list of cutting enzymes. | |
| - title is the title. | |
| - nc is a list of non cutting enzymes. | |
| - s1 is the sentence before the non cutting enzymes. | |
| """ | |
| return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1) | |
| def _make_number(self, ls, title, nc, s1): | |
| """Format cutting position information as a string (PRIVATE). | |
| Returns a string in the form:: | |
| title. | |
| enzyme which cut 1 time: | |
| enzyme1 : position1. | |
| enzyme which cut 2 times: | |
| enzyme2 : position1, position2. | |
| ... | |
| Arguments: | |
| - ls is a list of cutting enzymes. | |
| - title is the title. | |
| - nc is a list of non cutting enzymes. | |
| - s1 is the sentence before the non cutting enzymes. | |
| """ | |
| return self._make_number_only(ls, title) + self._make_nocut_only(nc, s1) | |
| def _make_nocut(self, ls, title, nc, s1): | |
| """Summarise non-cutting enzymes (PRIVATE). | |
| Return a formatted string of the non cutting enzymes. | |
| ls is a list of cutting enzymes -> will not be used. | |
| Here for compatibility with make_format. | |
| Arguments: | |
| - title is the title. | |
| - nc is a list of non cutting enzymes. | |
| - s1 is the sentence before the non cutting enzymes. | |
| """ | |
| return title + self._make_nocut_only(nc, s1) | |
| def _make_nocut_only(self, nc, s1, ls=(), title=""): | |
| """Summarise non-cutting enzymes (PRIVATE). | |
| Return a formatted string of the non cutting enzymes. | |
| Arguments: | |
| - nc is a tuple or list of non cutting enzymes. | |
| - s1 is the sentence before the non cutting enzymes. | |
| """ | |
| if not nc: | |
| return s1 | |
| st = "" | |
| stringsite = s1 or "\n Enzymes which do not cut the sequence.\n\n" | |
| Join = "".join | |
| for key in sorted(nc): | |
| st = Join((st, str.ljust(str(key), self.NameWidth))) | |
| if len(st) > self.linesize: | |
| stringsite = Join((stringsite, st, "\n")) | |
| st = "" | |
| stringsite = Join((stringsite, st, "\n")) | |
| return stringsite | |
| def _make_list_only(self, ls, title, nc=(), s1=""): | |
| """Summarise list of positions per enzyme (PRIVATE). | |
| Return a string of form:: | |
| title. | |
| enzyme1 : position1, position2. | |
| enzyme2 : position1, position2, position3. | |
| ... | |
| Arguments: | |
| - ls is a tuple or list of results. | |
| - title is a string. | |
| - Non cutting enzymes are not included. | |
| """ | |
| if not ls: | |
| return title | |
| return self.__next_section(ls, title) | |
| def _make_number_only(self, ls, title, nc=(), s1=""): | |
| """Summarise number of cuts as a string (PRIVATE). | |
| Return a string of form:: | |
| title. | |
| enzyme which cut 1 time: | |
| enzyme1 : position1. | |
| enzyme which cut 2 times: | |
| enzyme2 : position1, position2. | |
| ... | |
| Arguments: | |
| - ls is a list of results. | |
| - title is a string. | |
| - Non cutting enzymes are not included. | |
| """ | |
| if not ls: | |
| return title | |
| ls.sort(key=lambda x: len(x[1])) | |
| iterator = iter(ls) | |
| cur_len = 1 | |
| new_sect = [] | |
| for name, sites in iterator: | |
| length = len(sites) | |
| if length > cur_len: | |
| title += "\n\nenzymes which cut %i times :\n\n" % cur_len | |
| title = self.__next_section(new_sect, title) | |
| new_sect, cur_len = [(name, sites)], length | |
| continue | |
| new_sect.append((name, sites)) | |
| title += "\n\nenzymes which cut %i times :\n\n" % cur_len | |
| return self.__next_section(new_sect, title) | |
| def _make_map_only(self, ls, title, nc=(), s1=""): | |
| """Make string describing cutting map (PRIVATE). | |
| Return a string of form:: | |
| | title. | |
| | | |
| | enzyme1, position | |
| | | | |
| | AAAAAAAAAAAAAAAAAAAAA... | |
| | ||||||||||||||||||||| | |
| | TTTTTTTTTTTTTTTTTTTTT... | |
| Arguments: | |
| - ls is a list of results. | |
| - title is a string. | |
| - Non cutting enzymes are not included. | |
| """ | |
| if not ls: | |
| return title | |
| resultKeys = sorted(str(x) for x, y in ls) | |
| map = title or "" | |
| enzymemap = {} | |
| for (enzyme, cut) in ls: | |
| for c in cut: | |
| if c in enzymemap: | |
| enzymemap[c].append(str(enzyme)) | |
| else: | |
| enzymemap[c] = [str(enzyme)] | |
| mapping = sorted(enzymemap.keys()) | |
| cutloc = {} | |
| x, counter, length = 0, 0, len(self.sequence) | |
| for x in range(60, length, 60): | |
| counter = x - 60 | |
| loc = [] | |
| cutloc[counter] = loc | |
| remaining = [] | |
| for key in mapping: | |
| if key <= x: | |
| loc.append(key) | |
| else: | |
| remaining.append(key) | |
| mapping = remaining | |
| cutloc[x] = mapping | |
| sequence = str(self.sequence) | |
| revsequence = str( | |
| self.sequence.complement(inplace=False) | |
| ) # TODO: remove inplace=False | |
| a = "|" | |
| base, counter = 0, 0 | |
| emptyline = " " * 60 | |
| Join = "".join | |
| for base in range(60, length, 60): | |
| counter = base - 60 | |
| line = emptyline | |
| for key in cutloc[counter]: | |
| s = "" | |
| if key == base: | |
| for n in enzymemap[key]: | |
| s = " ".join((s, n)) | |
| chunk = line[0:59] | |
| lineo = Join((chunk, str(key), s, "\n")) | |
| line2 = Join((chunk, a, "\n")) | |
| linetot = Join((lineo, line2)) | |
| map = Join((map, linetot)) | |
| break | |
| for n in enzymemap[key]: | |
| s = " ".join((s, n)) | |
| k = key % 60 | |
| lineo = Join((line[0 : (k - 1)], str(key), s, "\n")) | |
| line = Join((line[0 : (k - 1)], a, line[k:])) | |
| line2 = Join((line[0 : (k - 1)], a, line[k:], "\n")) | |
| linetot = Join((lineo, line2)) | |
| map = Join((map, linetot)) | |
| mapunit = "\n".join( | |
| ( | |
| sequence[counter:base], | |
| a * 60, | |
| revsequence[counter:base], | |
| Join( | |
| ( | |
| str.ljust(str(counter + 1), 15), | |
| " " * 30, | |
| str.rjust(str(base), 15), | |
| "\n\n", | |
| ) | |
| ), | |
| ) | |
| ) | |
| map = Join((map, mapunit)) | |
| line = " " * 60 | |
| for key in cutloc[base]: | |
| s = "" | |
| if key == length: | |
| for n in enzymemap[key]: | |
| s = Join((s, " ", n)) | |
| chunk = line[0 : (length - 1)] | |
| lineo = Join((chunk, str(key), s, "\n")) | |
| line2 = Join((chunk, a, "\n")) | |
| linetot = Join((lineo, line2)) | |
| map = Join((map, linetot)) | |
| break | |
| for n in enzymemap[key]: | |
| s = Join((s, " ", n)) | |
| k = key % 60 | |
| lineo = Join((line[0 : (k - 1)], str(key), s, "\n")) | |
| line = Join((line[0 : (k - 1)], a, line[k:])) | |
| line2 = Join((line[0 : (k - 1)], a, line[k:], "\n")) | |
| linetot = Join((lineo, line2)) | |
| map = Join((map, linetot)) | |
| mapunit = "" | |
| mapunit = Join((sequence[base:length], "\n")) | |
| mapunit = Join((mapunit, a * (length - base), "\n")) | |
| mapunit = Join((mapunit, revsequence[base:length], "\n")) | |
| mapunit = Join( | |
| ( | |
| mapunit, | |
| Join( | |
| ( | |
| str.ljust(str(base + 1), 15), | |
| " " * (length - base - 30), | |
| str.rjust(str(length), 15), | |
| "\n\n", | |
| ) | |
| ), | |
| ) | |
| ) | |
| map = Join((map, mapunit)) | |
| return map | |
| # private method to do lists: | |
| def __next_section(self, ls, into): | |
| """Next section (PRIVATE). | |
| Arguments: | |
| - ls is a tuple/list of tuple (string, [int, int]). | |
| - into is a string to which the formatted ls will be added. | |
| Format ls as a string of lines: | |
| The form is:: | |
| enzyme1 : position1. | |
| enzyme2 : position2, position3. | |
| then add the formatted ls to tot | |
| return tot. | |
| """ | |
| indentation = "\n" + (self.NameWidth + self.Indent) * " " | |
| linesize = self.linesize - self.MaxSize | |
| pat = re.compile(r"([\w,\s()]){1,%i}[,\.]" % linesize) | |
| several, Join = "", "".join | |
| for name, sites in sorted(ls): | |
| stringsite = "" | |
| output = Join((", ".join(str(site) for site in sites), ".")) | |
| if len(output) > linesize: | |
| # | |
| # cut where appropriate and add the indentation | |
| # | |
| output = [x.group() for x in re.finditer(pat, output)] | |
| stringsite = indentation.join(output) | |
| else: | |
| stringsite = output | |
| into = Join( | |
| (into, str(name).ljust(self.NameWidth), " : ", stringsite, "\n") | |
| ) | |
| return into | |