Spaces:
No application file
No application file
| #!/usr/bin/env python | |
| # | |
| # Restriction Analysis Libraries. | |
| # Copyright (C) 2004. Frederic Sohm. | |
| # | |
| # This code is part of the Biopython distribution and governed by its | |
| # license. Please see the LICENSE file that should have been included | |
| # as part of this package. | |
| # | |
| """Restriction Enzyme classes. | |
| Notes about the diverses class of the restriction enzyme implementation:: | |
| RestrictionType is the type of all restriction enzymes. | |
| ----------------------------------------------------------------------- | |
| AbstractCut implements some methods that are common to all enzymes. | |
| ----------------------------------------------------------------------- | |
| NoCut, OneCut,TwoCuts represent the number of double strand cuts | |
| produced by the enzyme. | |
| they correspond to the 4th field of the | |
| rebase record emboss_e.NNN. | |
| 0->NoCut : the enzyme is not characterised. | |
| 2->OneCut : the enzyme produce one double strand cut. | |
| 4->TwoCuts : two double strand cuts. | |
| ----------------------------------------------------------------------- | |
| Meth_Dep, Meth_Undep represent the methylation susceptibility to | |
| the enzyme. | |
| Not implemented yet. | |
| ----------------------------------------------------------------------- | |
| Palindromic, if the site is palindromic or not. | |
| NotPalindromic allow some optimisations of the code. | |
| No need to check the reverse strand | |
| with palindromic sites. | |
| ----------------------------------------------------------------------- | |
| Unknown, Blunt, represent the overhang. | |
| Ov5, Ov3 Unknown is here for symmetry reasons and | |
| correspond to enzymes that are not | |
| characterised in rebase. | |
| ----------------------------------------------------------------------- | |
| Defined, Ambiguous, represent the sequence of the overhang. | |
| NotDefined | |
| NotDefined is for enzymes not characterised | |
| in rebase. | |
| Defined correspond to enzymes that display | |
| a constant overhang whatever the sequence. | |
| ex : EcoRI. G^AATTC -> overhang :AATT | |
| CTTAA^G | |
| Ambiguous : the overhang varies with the | |
| sequence restricted. | |
| Typically enzymes which cut outside their | |
| restriction site or (but not always) | |
| inside an ambiguous site. | |
| ex: | |
| AcuI CTGAAG(22/20) -> overhang : NN | |
| AasI GACNNN^NNNGTC -> overhang : NN | |
| CTGN^NNNNNCAG | |
| note : these 3 classes refers to the overhang not the site. | |
| So the enzyme ApoI (RAATTY) is defined even if its | |
| restriction site is ambiguous. | |
| ApoI R^AATTY -> overhang : AATT -> Defined | |
| YTTAA^R | |
| Accordingly, blunt enzymes are always Defined even | |
| when they cut outside their restriction site. | |
| ----------------------------------------------------------------------- | |
| Not_available, as found in rebase file emboss_r.NNN files. | |
| Commercially_available | |
| allow the selection of the enzymes | |
| according to their suppliers to reduce the | |
| quantity of results. | |
| Also will allow the implementation of | |
| buffer compatibility tables. Not | |
| implemented yet. | |
| the list of suppliers is extracted from | |
| emboss_s.NNN | |
| ----------------------------------------------------------------------- | |
| """ | |
| import warnings | |
| import re | |
| import string | |
| import itertools | |
| from Bio.Seq import Seq, MutableSeq | |
| from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict | |
| from Bio.Restriction.Restriction_Dictionary import typedict | |
| from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict | |
| from Bio.Restriction.PrintFormat import PrintFormat | |
| from Bio import BiopythonWarning | |
| matching = { | |
| "A": "ARWMHVDN", | |
| "C": "CYSMHBVN", | |
| "G": "GRSKBVDN", | |
| "T": "TYWKHBDN", | |
| "R": "ABDGHKMNSRWV", | |
| "Y": "CBDHKMNSTWVY", | |
| "W": "ABDHKMNRTWVY", | |
| "S": "CBDGHKMNSRVY", | |
| "M": "ACBDHMNSRWVY", | |
| "K": "BDGHKNSRTWVY", | |
| "H": "ACBDHKMNSRTWVY", | |
| "B": "CBDGHKMNSRTWVY", | |
| "V": "ACBDGHKMNSRWVY", | |
| "D": "ABDGHKMNSRTWVY", | |
| "N": "ACBDGHKMNSRTWVY", | |
| } | |
| DNA = Seq | |
| class FormattedSeq: | |
| """A linear or circular sequence object for restriction analysis. | |
| Translates a Bio.Seq into a formatted sequence to be used with Restriction. | |
| Roughly: remove anything which is not IUPAC alphabet and then add a space | |
| in front of the sequence to get a biological index instead of a | |
| python index (i.e. index of the first base is 1 not 0). | |
| Retains information about the shape of the molecule linear (default) or | |
| circular. Restriction sites are search over the edges of circular sequence. | |
| """ | |
| _remove_chars = string.whitespace.encode() + string.digits.encode() | |
| _table = bytearray(256) | |
| upper_to_lower = ord("A") - ord("a") | |
| for c in b"ABCDGHKMNRSTVWY": # Only allow IUPAC letters | |
| _table[c] = c # map uppercase to uppercase | |
| _table[c - upper_to_lower] = c # map lowercase to uppercase | |
| del upper_to_lower | |
| _table = bytes(_table) | |
| def __init__(self, seq, linear=True): | |
| """Initialize ``FormattedSeq`` with sequence and topology (optional). | |
| ``seq`` is either a ``Bio.Seq``, ``Bio.MutableSeq`` or a | |
| ``FormattedSeq``. If ``seq`` is a ``FormattedSeq``, ``linear`` | |
| will have no effect on the shape of the sequence. | |
| """ | |
| if isinstance(seq, (Seq, MutableSeq)): | |
| self.lower = seq.islower() | |
| data = bytes(seq) | |
| self.data = data.translate(self._table, delete=self._remove_chars) | |
| if 0 in self.data: # Check if all letters were IUPAC | |
| raise TypeError(f"Invalid character found in {data.decode()}") | |
| # Note this adds a leading space to the sequence (!) | |
| self.data = " " + self.data.decode("ASCII") | |
| self.linear = linear | |
| self.klass = seq.__class__ | |
| elif isinstance(seq, FormattedSeq): | |
| self.lower = seq.lower | |
| self.data = seq.data | |
| self.linear = seq.linear | |
| self.klass = seq.klass | |
| else: | |
| raise TypeError(f"expected Seq or MutableSeq, got {type(seq)}") | |
| def __len__(self): | |
| """Return length of ``FormattedSeq``. | |
| ``FormattedSeq`` has a leading space, thus subtract 1. | |
| """ | |
| return len(self.data) - 1 | |
| def __repr__(self): | |
| """Represent ``FormattedSeq`` class as a string.""" | |
| return f"FormattedSeq({self[1:]!r}, linear={self.linear!r})" | |
| def __eq__(self, other): | |
| """Implement equality operator for ``FormattedSeq`` object.""" | |
| if isinstance(other, FormattedSeq): | |
| if repr(self) == repr(other): | |
| return True | |
| else: | |
| return False | |
| return False | |
| def circularise(self): | |
| """Circularise sequence in place.""" | |
| self.linear = False | |
| def linearise(self): | |
| """Linearise sequence in place.""" | |
| self.linear = True | |
| def to_linear(self): | |
| """Make a new instance of sequence as linear.""" | |
| new = self.__class__(self) | |
| new.linear = True | |
| return new | |
| def to_circular(self): | |
| """Make a new instance of sequence as circular.""" | |
| new = self.__class__(self) | |
| new.linear = False | |
| return new | |
| def is_linear(self): | |
| """Return if sequence is linear (True) or circular (False).""" | |
| return self.linear | |
| def finditer(self, pattern, size): | |
| """Return a list of a given pattern which occurs in the sequence. | |
| The list is made of tuple (location, pattern.group). | |
| The latter is used with non palindromic sites. | |
| Pattern is the regular expression pattern corresponding to the | |
| enzyme restriction site. | |
| Size is the size of the restriction enzyme recognition-site size. | |
| """ | |
| if self.is_linear(): | |
| data = self.data | |
| else: | |
| data = self.data + self.data[1:size] | |
| return [(i.start(), i.group) for i in re.finditer(pattern, data)] | |
| def __getitem__(self, i): | |
| """Return substring of ``FormattedSeq``. | |
| The class of the returned object is the class of the respective | |
| sequence. Note that due to the leading space, indexing is 1-based: | |
| >>> from Bio.Seq import Seq | |
| >>> from Bio.Restriction.Restriction import FormattedSeq | |
| >>> f_seq = FormattedSeq(Seq('ATGCATGC')) | |
| >>> f_seq[1] | |
| Seq('A') | |
| """ | |
| if self.lower: | |
| return self.klass(self.data[i].lower()) | |
| return self.klass(self.data[i]) | |
| class RestrictionType(type): | |
| """RestrictionType. Type from which all enzyme classes are derived. | |
| Implement the operator methods. | |
| """ | |
| def __init__(cls, name="", bases=(), dct=None): | |
| """Initialize RestrictionType instance. | |
| Not intended to be used in normal operation. The enzymes are | |
| instantiated when importing the module. | |
| See below. | |
| """ | |
| if "-" in name: | |
| raise ValueError(f"Problem with hyphen in {name!r} as enzyme name") | |
| # 2011/11/26 - Nobody knows what this call was supposed to accomplish, | |
| # but all unit tests seem to pass without it. | |
| # super().__init__(cls, name, bases, dct) | |
| try: | |
| cls.compsite = re.compile(cls.compsite) | |
| except AttributeError: | |
| # Can happen if initialised wrongly. | |
| # (This was seen when Sphinx api-doc imports the classes, and | |
| # tried to automatically general documentation for them) | |
| pass | |
| except Exception: | |
| raise ValueError( | |
| f"Problem with regular expression, re.compiled({cls.compsite!r})" | |
| ) from None | |
| def __add__(cls, other): | |
| """Add restriction enzyme to a RestrictionBatch(). | |
| If other is an enzyme returns a batch of the two enzymes. | |
| If other is already a RestrictionBatch add enzyme to it. | |
| """ | |
| if isinstance(other, RestrictionType): | |
| return RestrictionBatch([cls, other]) | |
| elif isinstance(other, RestrictionBatch): | |
| return other.add_nocheck(cls) | |
| else: | |
| raise TypeError | |
| def __truediv__(cls, other): | |
| """Override '/' operator to use as search method. | |
| >>> from Bio.Restriction import EcoRI | |
| >>> EcoRI/Seq('GAATTC') | |
| [2] | |
| Returns RE.search(other). | |
| """ | |
| return cls.search(other) | |
| def __rtruediv__(cls, other): | |
| """Override division with reversed operands to use as search method. | |
| >>> from Bio.Restriction import EcoRI | |
| >>> Seq('GAATTC')/EcoRI | |
| [2] | |
| Returns RE.search(other). | |
| """ | |
| return cls.search(other) | |
| def __floordiv__(cls, other): | |
| """Override '//' operator to use as catalyse method. | |
| >>> from Bio.Restriction import EcoRI | |
| >>> EcoRI//Seq('GAATTC') | |
| (Seq('G'), Seq('AATTC')) | |
| Returns RE.catalyse(other). | |
| """ | |
| return cls.catalyse(other) | |
| def __rfloordiv__(cls, other): | |
| """As __floordiv__, with reversed operands. | |
| >>> from Bio.Restriction import EcoRI | |
| >>> Seq('GAATTC')//EcoRI | |
| (Seq('G'), Seq('AATTC')) | |
| Returns RE.catalyse(other). | |
| """ | |
| return cls.catalyse(other) | |
| def __str__(cls): | |
| """Return the name of the enzyme as string.""" | |
| return cls.__name__ | |
| def __repr__(cls): | |
| """Implement repr method. | |
| Used with eval or exec will instantiate the enzyme. | |
| """ | |
| return f"{cls.__name__}" | |
| def __len__(cls): | |
| """Return length of recognition site of enzyme as int.""" | |
| try: | |
| return cls.size | |
| except AttributeError: | |
| # Happens if the instance was not initialised as expected. | |
| # e.g. if instance created by a documentation framework | |
| # like Sphinx trying to inspect the class automatically, | |
| # Also seen within IPython. | |
| return 0 | |
| def __hash__(cls): | |
| """Implement ``hash()`` method for ``RestrictionType``. | |
| Python default is to use ``id(...)`` | |
| This is consistent with the ``__eq__`` implementation | |
| """ | |
| return id(cls) | |
| def __eq__(cls, other): | |
| """Override '==' operator. | |
| True if RE and other are the same enzyme. | |
| Specifically this checks they are the same Python object. | |
| """ | |
| # assert (id(cls)==id(other)) == (other is cls) == (cls is other) | |
| return id(cls) == id(other) | |
| def __ne__(cls, other): | |
| """Override '!=' operator. | |
| Isoschizomer strict (same recognition site, same restriction) -> False | |
| All the other-> True | |
| WARNING - This is not the inverse of the __eq__ method | |
| >>> from Bio.Restriction import SacI, SstI | |
| >>> SacI != SstI # true isoschizomers | |
| False | |
| >>> SacI == SstI | |
| False | |
| """ | |
| if not isinstance(other, RestrictionType): | |
| return True | |
| elif cls.charac == other.charac: | |
| return False | |
| else: | |
| return True | |
| def __rshift__(cls, other): | |
| """Override '>>' operator to test for neoschizomers. | |
| neoschizomer : same recognition site, different restriction. -> True | |
| all the others : -> False | |
| >>> from Bio.Restriction import SmaI, XmaI | |
| >>> SmaI >> XmaI | |
| True | |
| """ | |
| if not isinstance(other, RestrictionType): | |
| return False | |
| elif cls.site == other.site and cls.charac != other.charac: | |
| return True | |
| else: | |
| return False | |
| def __mod__(cls, other): | |
| """Override '%' operator to test for compatible overhangs. | |
| True if a and b have compatible overhang. | |
| >>> from Bio.Restriction import XhoI, SalI | |
| >>> XhoI % SalI | |
| True | |
| """ | |
| if not isinstance(other, RestrictionType): | |
| raise TypeError(f"expected RestrictionType, got {type(other)} instead") | |
| return cls._mod1(other) | |
| def __ge__(cls, other): | |
| """Compare length of recognition site of two enzymes. | |
| Override '>='. a is greater or equal than b if the a site is longer | |
| than b site. If their site have the same length sort by alphabetical | |
| order of their names. | |
| >>> from Bio.Restriction import EcoRI, EcoRV | |
| >>> EcoRI.size | |
| 6 | |
| >>> EcoRV.size | |
| 6 | |
| >>> EcoRI >= EcoRV | |
| False | |
| """ | |
| if not isinstance(other, RestrictionType): | |
| raise NotImplementedError | |
| if len(cls) > len(other): | |
| return True | |
| elif cls.size == len(other) and cls.__name__ >= other.__name__: | |
| return True | |
| else: | |
| return False | |
| def __gt__(cls, other): | |
| """Compare length of recognition site of two enzymes. | |
| Override '>'. Sorting order: | |
| 1. size of the recognition site. | |
| 2. if equal size, alphabetical order of the names. | |
| """ | |
| if not isinstance(other, RestrictionType): | |
| raise NotImplementedError | |
| if len(cls) > len(other): | |
| return True | |
| elif cls.size == len(other) and cls.__name__ > other.__name__: | |
| return True | |
| else: | |
| return False | |
| def __le__(cls, other): | |
| """Compare length of recognition site of two enzymes. | |
| Override '<='. Sorting order: | |
| 1. size of the recognition site. | |
| 2. if equal size, alphabetical order of the names. | |
| """ | |
| if not isinstance(other, RestrictionType): | |
| raise NotImplementedError | |
| elif len(cls) < len(other): | |
| return True | |
| elif len(cls) == len(other) and cls.__name__ <= other.__name__: | |
| return True | |
| else: | |
| return False | |
| def __lt__(cls, other): | |
| """Compare length of recognition site of two enzymes. | |
| Override '<'. Sorting order: | |
| 1. size of the recognition site. | |
| 2. if equal size, alphabetical order of the names. | |
| """ | |
| if not isinstance(other, RestrictionType): | |
| raise NotImplementedError | |
| elif len(cls) < len(other): | |
| return True | |
| elif len(cls) == len(other) and cls.__name__ < other.__name__: | |
| return True | |
| else: | |
| return False | |
| class AbstractCut(RestrictionType): | |
| """Implement the methods that are common to all restriction enzymes. | |
| All the methods are classmethod. | |
| For internal use only. Not meant to be instantiated. | |
| """ | |
| def search(cls, dna, linear=True): | |
| """Return a list of cutting sites of the enzyme in the sequence. | |
| Compensate for circular sequences and so on. | |
| dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. | |
| If linear is False, the restriction sites that span over the boundaries | |
| will be included. | |
| The positions are the first base of the 3' fragment, | |
| i.e. the first base after the position the enzyme will cut. | |
| """ | |
| # | |
| # Separating search from _search allow a (very limited) optimisation | |
| # of the search when using a batch of restriction enzymes. | |
| # in this case the DNA is tested once by the class which implements | |
| # the batch instead of being tested by each enzyme single. | |
| # see RestrictionBatch.search() for example. | |
| # | |
| if isinstance(dna, FormattedSeq): | |
| cls.dna = dna | |
| return cls._search() | |
| else: | |
| cls.dna = FormattedSeq(dna, linear) | |
| return cls._search() | |
| def all_suppliers(cls): | |
| """Print all the suppliers of restriction enzyme.""" | |
| supply = sorted(x[0] for x in suppliers_dict.values()) | |
| print(",\n".join(supply)) | |
| def is_equischizomer(cls, other): | |
| """Test for real isoschizomer. | |
| True if other is an isoschizomer of RE, but not an neoschizomer, | |
| else False. | |
| Equischizomer: same site, same position of restriction. | |
| >>> from Bio.Restriction import SacI, SstI, SmaI, XmaI | |
| >>> SacI.is_equischizomer(SstI) | |
| True | |
| >>> SmaI.is_equischizomer(XmaI) | |
| False | |
| """ | |
| return not cls != other | |
| def is_neoschizomer(cls, other): | |
| """Test for neoschizomer. | |
| True if other is an isoschizomer of RE, else False. | |
| Neoschizomer: same site, different position of restriction. | |
| """ | |
| return cls >> other | |
| def is_isoschizomer(cls, other): | |
| """Test for same recognition site. | |
| True if other has the same recognition site, else False. | |
| Isoschizomer: same site. | |
| >>> from Bio.Restriction import SacI, SstI, SmaI, XmaI | |
| >>> SacI.is_isoschizomer(SstI) | |
| True | |
| >>> SmaI.is_isoschizomer(XmaI) | |
| True | |
| """ | |
| return (not cls != other) or cls >> other | |
| def equischizomers(cls, batch=None): | |
| """List equischizomers of the enzyme. | |
| Return a tuple of all the isoschizomers of RE. | |
| If batch is supplied it is used instead of the default AllEnzymes. | |
| Equischizomer: same site, same position of restriction. | |
| """ | |
| if not batch: | |
| batch = AllEnzymes | |
| r = [x for x in batch if not cls != x] | |
| i = r.index(cls) | |
| del r[i] | |
| r.sort() | |
| return r | |
| def neoschizomers(cls, batch=None): | |
| """List neoschizomers of the enzyme. | |
| Return a tuple of all the neoschizomers of RE. | |
| If batch is supplied it is used instead of the default AllEnzymes. | |
| Neoschizomer: same site, different position of restriction. | |
| """ | |
| if not batch: | |
| batch = AllEnzymes | |
| r = sorted(x for x in batch if cls >> x) | |
| return r | |
| def isoschizomers(cls, batch=None): | |
| """List all isoschizomers of the enzyme. | |
| Return a tuple of all the equischizomers and neoschizomers of RE. | |
| If batch is supplied it is used instead of the default AllEnzymes. | |
| """ | |
| if not batch: | |
| batch = AllEnzymes | |
| r = [x for x in batch if (cls >> x) or (not cls != x)] | |
| i = r.index(cls) | |
| del r[i] | |
| r.sort() | |
| return r | |
| def frequency(cls): | |
| """Return the theoretically cutting frequency of the enzyme. | |
| Frequency of the site, given as 'one cut per x bases' (int). | |
| """ | |
| return cls.freq | |
| class NoCut(AbstractCut): | |
| """Implement the methods specific to the enzymes that do not cut. | |
| These enzymes are generally enzymes that have been only partially | |
| characterised and the way they cut the DNA is unknown or enzymes for | |
| which the pattern of cut is to complex to be recorded in Rebase | |
| (ncuts values of 0 in emboss_e.###). | |
| When using search() with these enzymes the values returned are at the start | |
| of the restriction site. | |
| Their catalyse() method returns a TypeError. | |
| Unknown and NotDefined are also part of the base classes of these enzymes. | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| def cut_once(cls): | |
| """Return if the cutting pattern has one cut. | |
| True if the enzyme cut the sequence one time on each strand. | |
| """ | |
| return False | |
| def cut_twice(cls): | |
| """Return if the cutting pattern has two cuts. | |
| True if the enzyme cut the sequence twice on each strand. | |
| """ | |
| return False | |
| def _modify(cls, location): | |
| """Return a generator that moves the cutting position by 1 (PRIVATE). | |
| For internal use only. | |
| location is an integer corresponding to the location of the match for | |
| the enzyme pattern in the sequence. | |
| _modify returns the real place where the enzyme will cut. | |
| Example:: | |
| EcoRI pattern : GAATTC | |
| EcoRI will cut after the G. | |
| so in the sequence: | |
| ______ | |
| GAATACACGGAATTCGA | |
| | | |
| 10 | |
| dna.finditer(GAATTC, 6) will return 10 as G is the 10th base | |
| EcoRI cut after the G so: | |
| EcoRI._modify(10) -> 11. | |
| If the enzyme cut twice _modify will returns two integer corresponding | |
| to each cutting site. | |
| """ | |
| yield location | |
| def _rev_modify(cls, location): | |
| """Return a generator that moves the cutting position by 1 (PRIVATE). | |
| For internal use only. | |
| As _modify for site situated on the antiparallel strand when the | |
| enzyme is not palindromic. | |
| """ | |
| yield location | |
| def characteristic(cls): | |
| """Return a list of the enzyme's characteristics as tuple. | |
| the tuple contains the attributes: | |
| - fst5 -> first 5' cut ((current strand) or None | |
| - fst3 -> first 3' cut (complementary strand) or None | |
| - scd5 -> second 5' cut (current strand) or None | |
| - scd5 -> second 3' cut (complementary strand) or None | |
| - site -> recognition site. | |
| """ | |
| return None, None, None, None, cls.site | |
| class OneCut(AbstractCut): | |
| """Implement the methods for enzymes that cut the DNA only once. | |
| Correspond to ncuts values of 2 in emboss_e.### | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| def cut_once(cls): | |
| """Return if the cutting pattern has one cut. | |
| True if the enzyme cut the sequence one time on each strand. | |
| """ | |
| return True | |
| def cut_twice(cls): | |
| """Return if the cutting pattern has two cuts. | |
| True if the enzyme cut the sequence twice on each strand. | |
| """ | |
| return False | |
| def _modify(cls, location): | |
| """Return a generator that moves the cutting position by 1 (PRIVATE). | |
| For internal use only. | |
| location is an integer corresponding to the location of the match for | |
| the enzyme pattern in the sequence. | |
| _modify returns the real place where the enzyme will cut. | |
| Example:: | |
| EcoRI pattern : GAATTC | |
| EcoRI will cut after the G. | |
| so in the sequence: | |
| ______ | |
| GAATACACGGAATTCGA | |
| | | |
| 10 | |
| dna.finditer(GAATTC, 6) will return 10 as G is the 10th base | |
| EcoRI cut after the G so: | |
| EcoRI._modify(10) -> 11. | |
| if the enzyme cut twice _modify will returns two integer corresponding | |
| to each cutting site. | |
| """ | |
| yield location + cls.fst5 | |
| def _rev_modify(cls, location): | |
| """Return a generator that moves the cutting position by 1 (PRIVATE). | |
| For internal use only. | |
| As _modify for site situated on the antiparallel strand when the | |
| enzyme is not palindromic | |
| """ | |
| yield location - cls.fst3 | |
| def characteristic(cls): | |
| """Return a list of the enzyme's characteristics as tuple. | |
| The tuple contains the attributes: | |
| - fst5 -> first 5' cut ((current strand) or None | |
| - fst3 -> first 3' cut (complementary strand) or None | |
| - scd5 -> second 5' cut (current strand) or None | |
| - scd5 -> second 3' cut (complementary strand) or None | |
| - site -> recognition site. | |
| """ | |
| return cls.fst5, cls.fst3, None, None, cls.site | |
| class TwoCuts(AbstractCut): | |
| """Implement the methods for enzymes that cut the DNA twice. | |
| Correspond to ncuts values of 4 in emboss_e.### | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| def cut_once(cls): | |
| """Return if the cutting pattern has one cut. | |
| True if the enzyme cut the sequence one time on each strand. | |
| """ | |
| return False | |
| def cut_twice(cls): | |
| """Return if the cutting pattern has two cuts. | |
| True if the enzyme cut the sequence twice on each strand. | |
| """ | |
| return True | |
| def _modify(cls, location): | |
| """Return a generator that moves the cutting position by 1 (PRIVATE). | |
| For internal use only. | |
| location is an integer corresponding to the location of the match for | |
| the enzyme pattern in the sequence. | |
| _modify returns the real place where the enzyme will cut. | |
| example:: | |
| EcoRI pattern : GAATTC | |
| EcoRI will cut after the G. | |
| so in the sequence: | |
| ______ | |
| GAATACACGGAATTCGA | |
| | | |
| 10 | |
| dna.finditer(GAATTC, 6) will return 10 as G is the 10th base | |
| EcoRI cut after the G so: | |
| EcoRI._modify(10) -> 11. | |
| if the enzyme cut twice _modify will returns two integer corresponding | |
| to each cutting site. | |
| """ | |
| yield location + cls.fst5 | |
| yield location + cls.scd5 | |
| def _rev_modify(cls, location): | |
| """Return a generator that moves the cutting position by 1 (PRIVATE). | |
| for internal use only. | |
| as _modify for site situated on the antiparallel strand when the | |
| enzyme is not palindromic | |
| """ | |
| yield location - cls.fst3 | |
| yield location - cls.scd3 | |
| def characteristic(cls): | |
| """Return a list of the enzyme's characteristics as tuple. | |
| the tuple contains the attributes: | |
| - fst5 -> first 5' cut ((current strand) or None | |
| - fst3 -> first 3' cut (complementary strand) or None | |
| - scd5 -> second 5' cut (current strand) or None | |
| - scd5 -> second 3' cut (complementary strand) or None | |
| - site -> recognition site. | |
| """ | |
| return cls.fst5, cls.fst3, cls.scd5, cls.scd3, cls.site | |
| class Meth_Dep(AbstractCut): | |
| """Implement the information about methylation. | |
| Enzymes of this class possess a site which is methylable. | |
| """ | |
| def is_methylable(cls): | |
| """Return if recognition site can be methylated. | |
| True if the recognition site is a methylable. | |
| """ | |
| return True | |
| class Meth_Undep(AbstractCut): | |
| """Implement information about methylation sensitibility. | |
| Enzymes of this class are not sensible to methylation. | |
| """ | |
| def is_methylable(cls): | |
| """Return if recognition site can be methylated. | |
| True if the recognition site is a methylable. | |
| """ | |
| return False | |
| class Palindromic(AbstractCut): | |
| """Implement methods for enzymes with palindromic recognition sites. | |
| palindromic means : the recognition site and its reverse complement are | |
| identical. | |
| Remarks : an enzyme with a site CGNNCG is palindromic even if some | |
| of the sites that it will recognise are not. | |
| for example here : CGAACG | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| def _search(cls): | |
| """Return a list of cutting sites of the enzyme in the sequence (PRIVATE). | |
| For internal use only. | |
| Implement the search method for palindromic enzymes. | |
| """ | |
| siteloc = cls.dna.finditer(cls.compsite, cls.size) | |
| cls.results = [r for s, g in siteloc for r in cls._modify(s)] | |
| if cls.results: | |
| cls._drop() | |
| return cls.results | |
| def is_palindromic(cls): | |
| """Return if the enzyme has a palindromic recoginition site.""" | |
| return True | |
| class NonPalindromic(AbstractCut): | |
| """Implement methods for enzymes with non-palindromic recognition sites. | |
| Palindromic means : the recognition site and its reverse complement are | |
| identical. | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| def _search(cls): | |
| """Return a list of cutting sites of the enzyme in the sequence (PRIVATE). | |
| For internal use only. | |
| Implement the search method for non palindromic enzymes. | |
| """ | |
| iterator = cls.dna.finditer(cls.compsite, cls.size) | |
| cls.results = [] | |
| modif = cls._modify | |
| revmodif = cls._rev_modify | |
| s = str(cls) | |
| cls.on_minus = [] | |
| for start, group in iterator: | |
| if group(s): | |
| cls.results += list(modif(start)) | |
| else: | |
| cls.on_minus += list(revmodif(start)) | |
| cls.results += cls.on_minus | |
| if cls.results: | |
| cls.results.sort() | |
| cls._drop() | |
| return cls.results | |
| def is_palindromic(cls): | |
| """Return if the enzyme has a palindromic recoginition site.""" | |
| return False | |
| class Unknown(AbstractCut): | |
| """Implement methods for enzymes that produce unknown overhangs. | |
| These enzymes are also NotDefined and NoCut. | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| def catalyse(cls, dna, linear=True): | |
| """List the sequence fragments after cutting dna with enzyme. | |
| Return a tuple of dna as will be produced by using RE to restrict the | |
| dna. | |
| dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. | |
| If linear is False, the sequence is considered to be circular and the | |
| output will be modified accordingly. | |
| """ | |
| raise NotImplementedError(f"{cls.__name__} restriction is unknown.") | |
| catalyze = catalyse | |
| def is_blunt(cls): | |
| """Return if the enzyme produces blunt ends. | |
| True if the enzyme produces blunt end. | |
| Related methods: | |
| - RE.is_3overhang() | |
| - RE.is_5overhang() | |
| - RE.is_unknown() | |
| """ | |
| return False | |
| def is_5overhang(cls): | |
| """Return if the enzymes produces 5' overhanging ends. | |
| True if the enzyme produces 5' overhang sticky end. | |
| Related methods: | |
| - RE.is_3overhang() | |
| - RE.is_blunt() | |
| - RE.is_unknown() | |
| """ | |
| return False | |
| def is_3overhang(cls): | |
| """Return if the enzyme produces 3' overhanging ends. | |
| True if the enzyme produces 3' overhang sticky end. | |
| Related methods: | |
| - RE.is_5overhang() | |
| - RE.is_blunt() | |
| - RE.is_unknown() | |
| """ | |
| return False | |
| def overhang(cls): | |
| """Return the type of the enzyme's overhang as string. | |
| Can be "3' overhang", "5' overhang", "blunt", "unknown". | |
| """ | |
| return "unknown" | |
| def compatible_end(cls): | |
| """List all enzymes that produce compatible ends for the enzyme.""" | |
| return [] | |
| def _mod1(cls, other): | |
| """Test if other enzyme produces compatible ends for enzyme (PRIVATE). | |
| For internal use only. | |
| Test for the compatibility of restriction ending of RE and other. | |
| """ | |
| return False | |
| class Blunt(AbstractCut): | |
| """Implement methods for enzymes that produce blunt ends. | |
| The enzyme cuts the + strand and the - strand of the DNA at the same | |
| place. | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| def catalyse(cls, dna, linear=True): | |
| """List the sequence fragments after cutting dna with enzyme. | |
| Return a tuple of dna as will be produced by using RE to restrict the | |
| dna. | |
| dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. | |
| If linear is False, the sequence is considered to be circular and the | |
| output will be modified accordingly. | |
| """ | |
| r = cls.search(dna, linear) | |
| d = cls.dna | |
| if not r: | |
| return (d[1:],) | |
| fragments = [] | |
| length = len(r) - 1 | |
| if d.is_linear(): | |
| # | |
| # START of the sequence to FIRST site. | |
| # | |
| fragments.append(d[1 : r[0]]) | |
| if length: | |
| # | |
| # if more than one site add them. | |
| # | |
| fragments += [d[r[x] : r[x + 1]] for x in range(length)] | |
| # | |
| # LAST site to END of the sequence. | |
| # | |
| fragments.append(d[r[-1] :]) | |
| else: | |
| # | |
| # circular : bridge LAST site to FIRST site. | |
| # | |
| fragments.append(d[r[-1] :] + d[1 : r[0]]) | |
| if not length: | |
| # | |
| # one site we finish here. | |
| # | |
| return tuple(fragments) | |
| # | |
| # add the others. | |
| # | |
| fragments += [d[r[x] : r[x + 1]] for x in range(length)] | |
| return tuple(fragments) | |
| catalyze = catalyse | |
| def is_blunt(cls): | |
| """Return if the enzyme produces blunt ends. | |
| True if the enzyme produces blunt end. | |
| Related methods: | |
| - RE.is_3overhang() | |
| - RE.is_5overhang() | |
| - RE.is_unknown() | |
| """ | |
| return True | |
| def is_5overhang(cls): | |
| """Return if the enzymes produces 5' overhanging ends. | |
| True if the enzyme produces 5' overhang sticky end. | |
| Related methods: | |
| - RE.is_3overhang() | |
| - RE.is_blunt() | |
| - RE.is_unknown() | |
| """ | |
| return False | |
| def is_3overhang(cls): | |
| """Return if the enzyme produces 3' overhanging ends. | |
| True if the enzyme produces 3' overhang sticky end. | |
| Related methods: | |
| - RE.is_5overhang() | |
| - RE.is_blunt() | |
| - RE.is_unknown() | |
| """ | |
| return False | |
| def overhang(cls): | |
| """Return the type of the enzyme's overhang as string. | |
| Can be "3' overhang", "5' overhang", "blunt", "unknown". | |
| """ | |
| return "blunt" | |
| def compatible_end(cls, batch=None): | |
| """List all enzymes that produce compatible ends for the enzyme.""" | |
| if not batch: | |
| batch = AllEnzymes | |
| r = sorted(x for x in iter(AllEnzymes) if x.is_blunt()) | |
| return r | |
| def _mod1(other): | |
| """Test if other enzyme produces compatible ends for enzyme (PRIVATE). | |
| For internal use only | |
| Test for the compatibility of restriction ending of RE and other. | |
| """ | |
| return issubclass(other, Blunt) | |
| class Ov5(AbstractCut): | |
| """Implement methods for enzymes that produce 5' overhanging ends. | |
| The enzyme cuts the + strand after the - strand of the DNA. | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| def catalyse(cls, dna, linear=True): | |
| """List the sequence fragments after cutting dna with enzyme. | |
| Return a tuple of dna as will be produced by using RE to restrict the | |
| dna. | |
| dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. | |
| If linear is False, the sequence is considered to be circular and the | |
| output will be modified accordingly. | |
| """ | |
| r = cls.search(dna, linear) | |
| d = cls.dna | |
| if not r: | |
| return (d[1:],) | |
| length = len(r) - 1 | |
| fragments = [] | |
| if d.is_linear(): | |
| # | |
| # START of the sequence to FIRST site. | |
| # | |
| fragments.append(d[1 : r[0]]) | |
| if length: | |
| # | |
| # if more than one site add them. | |
| # | |
| fragments += [d[r[x] : r[x + 1]] for x in range(length)] | |
| # | |
| # LAST site to END of the sequence. | |
| # | |
| fragments.append(d[r[-1] :]) | |
| else: | |
| # | |
| # circular : bridge LAST site to FIRST site. | |
| # | |
| fragments.append(d[r[-1] :] + d[1 : r[0]]) | |
| if not length: | |
| # | |
| # one site we finish here. | |
| # | |
| return tuple(fragments) | |
| # | |
| # add the others. | |
| # | |
| fragments += [d[r[x] : r[x + 1]] for x in range(length)] | |
| return tuple(fragments) | |
| catalyze = catalyse | |
| def is_blunt(cls): | |
| """Return if the enzyme produces blunt ends. | |
| True if the enzyme produces blunt end. | |
| Related methods: | |
| - RE.is_3overhang() | |
| - RE.is_5overhang() | |
| - RE.is_unknown() | |
| """ | |
| return False | |
| def is_5overhang(cls): | |
| """Return if the enzymes produces 5' overhanging ends. | |
| True if the enzyme produces 5' overhang sticky end. | |
| Related methods: | |
| - RE.is_3overhang() | |
| - RE.is_blunt() | |
| - RE.is_unknown() | |
| """ | |
| return True | |
| def is_3overhang(cls): | |
| """Return if the enzyme produces 3' overhanging ends. | |
| True if the enzyme produces 3' overhang sticky end. | |
| Related methods: | |
| - RE.is_5overhang() | |
| - RE.is_blunt() | |
| - RE.is_unknown() | |
| """ | |
| return False | |
| def overhang(cls): | |
| """Return the type of the enzyme's overhang as string. | |
| Can be "3' overhang", "5' overhang", "blunt", "unknown". | |
| """ | |
| return "5' overhang" | |
| def compatible_end(cls, batch=None): | |
| """List all enzymes that produce compatible ends for the enzyme.""" | |
| if not batch: | |
| batch = AllEnzymes | |
| r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and x % cls) | |
| return r | |
| def _mod1(cls, other): | |
| """Test if other enzyme produces compatible ends for enzyme (PRIVATE). | |
| For internal use only. | |
| Test for the compatibility of restriction ending of RE and other. | |
| """ | |
| if issubclass(other, Ov5): | |
| return cls._mod2(other) | |
| else: | |
| return False | |
| class Ov3(AbstractCut): | |
| """Implement methods for enzymes that produce 3' overhanging ends. | |
| The enzyme cuts the - strand after the + strand of the DNA. | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| def catalyse(cls, dna, linear=True): | |
| """List the sequence fragments after cutting dna with enzyme. | |
| Return a tuple of dna as will be produced by using RE to restrict the | |
| dna. | |
| dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. | |
| If linear is False, the sequence is considered to be circular and the | |
| output will be modified accordingly. | |
| """ | |
| r = cls.search(dna, linear) | |
| d = cls.dna | |
| if not r: | |
| return (d[1:],) | |
| fragments = [] | |
| length = len(r) - 1 | |
| if d.is_linear(): | |
| # | |
| # START of the sequence to FIRST site. | |
| # | |
| fragments.append(d[1 : r[0]]) | |
| if length: | |
| # | |
| # if more than one site add them. | |
| # | |
| fragments += [d[r[x] : r[x + 1]] for x in range(length)] | |
| # | |
| # LAST site to END of the sequence. | |
| # | |
| fragments.append(d[r[-1] :]) | |
| else: | |
| # | |
| # circular : bridge LAST site to FIRST site. | |
| # | |
| fragments.append(d[r[-1] :] + d[1 : r[0]]) | |
| if not length: | |
| # | |
| # one site we finish here. | |
| # | |
| return tuple(fragments) | |
| # | |
| # add the others. | |
| # | |
| fragments += [d[r[x] : r[x + 1]] for x in range(length)] | |
| return tuple(fragments) | |
| catalyze = catalyse | |
| def is_blunt(cls): | |
| """Return if the enzyme produces blunt ends. | |
| True if the enzyme produces blunt end. | |
| Related methods: | |
| - RE.is_3overhang() | |
| - RE.is_5overhang() | |
| - RE.is_unknown() | |
| """ | |
| return False | |
| def is_5overhang(cls): | |
| """Return if the enzymes produces 5' overhanging ends. | |
| True if the enzyme produces 5' overhang sticky end. | |
| Related methods: | |
| - RE.is_3overhang() | |
| - RE.is_blunt() | |
| - RE.is_unknown() | |
| """ | |
| return False | |
| def is_3overhang(cls): | |
| """Return if the enzyme produces 3' overhanging ends. | |
| True if the enzyme produces 3' overhang sticky end. | |
| Related methods: | |
| - RE.is_5overhang() | |
| - RE.is_blunt() | |
| - RE.is_unknown() | |
| """ | |
| return True | |
| def overhang(cls): | |
| """Return the type of the enzyme's overhang as string. | |
| Can be "3' overhang", "5' overhang", "blunt", "unknown". | |
| """ | |
| return "3' overhang" | |
| def compatible_end(cls, batch=None): | |
| """List all enzymes that produce compatible ends for the enzyme.""" | |
| if not batch: | |
| batch = AllEnzymes | |
| r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and x % cls) | |
| return r | |
| def _mod1(cls, other): | |
| """Test if other enzyme produces compatible ends for enzyme (PRIVATE). | |
| For internal use only. | |
| Test for the compatibility of restriction ending of RE and other. | |
| """ | |
| # | |
| # called by RE._mod1(other) when the one of the enzyme is ambiguous | |
| # | |
| if issubclass(other, Ov3): | |
| return cls._mod2(other) | |
| else: | |
| return False | |
| class Defined(AbstractCut): | |
| """Implement methods for enzymes with defined recognition site and cut. | |
| Typical example : EcoRI -> G^AATT_C | |
| The overhang will always be AATT | |
| Notes: | |
| Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N | |
| Their overhang is always the same : blunt! | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| def _drop(cls): | |
| """Remove cuts that are outsite of the sequence (PRIVATE). | |
| For internal use only. | |
| Drop the site that are situated outside the sequence in linear | |
| sequence. Modify the index for site in circular sequences. | |
| """ | |
| # | |
| # remove or modify the results that are outside the sequence. | |
| # This is necessary since after finding the site we add the distance | |
| # from the site to the cut with the _modify and _rev_modify methods. | |
| # For linear we will remove these sites altogether. | |
| # For circular sequence, we modify the result rather than _drop it | |
| # since the site is in the sequence. | |
| # | |
| length = len(cls.dna) | |
| drop = itertools.dropwhile | |
| take = itertools.takewhile | |
| if cls.dna.is_linear(): | |
| cls.results = list(drop(lambda x: x <= 1, cls.results)) | |
| cls.results = list(take(lambda x: x <= length, cls.results)) | |
| else: | |
| for index, location in enumerate(cls.results): | |
| if location < 1: | |
| cls.results[index] += length | |
| else: | |
| break | |
| for index, location in enumerate(cls.results[::-1]): | |
| if location > length: | |
| cls.results[-(index + 1)] -= length | |
| else: | |
| break | |
| def is_defined(cls): | |
| """Return if recognition sequence and cut are defined. | |
| True if the sequence recognised and cut is constant, | |
| i.e. the recognition site is not degenerated AND the enzyme cut inside | |
| the site. | |
| Related methods: | |
| - RE.is_ambiguous() | |
| - RE.is_unknown() | |
| """ | |
| return True | |
| def is_ambiguous(cls): | |
| """Return if recognition sequence and cut may be ambiguous. | |
| True if the sequence recognised and cut is ambiguous, | |
| i.e. the recognition site is degenerated AND/OR the enzyme cut outside | |
| the site. | |
| Related methods: | |
| - RE.is_defined() | |
| - RE.is_unknown() | |
| """ | |
| return False | |
| def is_unknown(cls): | |
| """Return if recognition sequence is unknown. | |
| True if the sequence is unknown, | |
| i.e. the recognition site has not been characterised yet. | |
| Related methods: | |
| - RE.is_defined() | |
| - RE.is_ambiguous() | |
| """ | |
| return False | |
| def elucidate(cls): | |
| """Return a string representing the recognition site and cuttings. | |
| Return a representation of the site with the cut on the (+) strand | |
| represented as '^' and the cut on the (-) strand as '_'. | |
| ie: | |
| >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI | |
| >>> EcoRI.elucidate() # 5' overhang | |
| 'G^AATT_C' | |
| >>> KpnI.elucidate() # 3' overhang | |
| 'G_GTAC^C' | |
| >>> EcoRV.elucidate() # blunt | |
| 'GAT^_ATC' | |
| >>> SnaI.elucidate() # NotDefined, cut profile unknown. | |
| '? GTATAC ?' | |
| >>> | |
| """ | |
| f5 = cls.fst5 | |
| f3 = cls.fst3 | |
| site = cls.site | |
| if cls.cut_twice(): | |
| re = "cut twice, not yet implemented sorry." | |
| elif cls.is_5overhang(): | |
| if f5 == f3 == 0: | |
| re = "N^" + cls.site + "_N" | |
| elif f3 == 0: | |
| re = site[:f5] + "^" + site[f5:] + "_N" | |
| else: | |
| re = site[:f5] + "^" + site[f5:f3] + "_" + site[f3:] | |
| elif cls.is_blunt(): | |
| re = site[:f5] + "^_" + site[f5:] | |
| else: | |
| if f5 == f3 == 0: | |
| re = "N_" + site + "^N" | |
| else: | |
| re = site[:f3] + "_" + site[f3:f5] + "^" + site[f5:] | |
| return re | |
| def _mod2(cls, other): | |
| """Test if other enzyme produces compatible ends for enzyme (PRIVATE). | |
| For internal use only. | |
| Test for the compatibility of restriction ending of RE and other. | |
| """ | |
| # | |
| # called by RE._mod1(other) when the one of the enzyme is ambiguous | |
| # | |
| if other.ovhgseq == cls.ovhgseq: | |
| return True | |
| elif issubclass(other, Ambiguous): | |
| return other._mod2(cls) | |
| else: | |
| return False | |
| class Ambiguous(AbstractCut): | |
| """Implement methods for enzymes that produce variable overhangs. | |
| Typical example : BstXI -> CCAN_NNNN^NTGG | |
| The overhang can be any sequence of 4 bases. | |
| Notes: | |
| Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N | |
| Their overhang is always the same : blunt! | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| def _drop(cls): | |
| """Remove cuts that are outsite of the sequence (PRIVATE). | |
| For internal use only. | |
| Drop the site that are situated outside the sequence in linear | |
| sequence. Modify the index for site in circular sequences. | |
| """ | |
| length = len(cls.dna) | |
| drop = itertools.dropwhile | |
| take = itertools.takewhile | |
| if cls.dna.is_linear(): | |
| cls.results = list(drop(lambda x: x <= 1, cls.results)) | |
| cls.results = list(take(lambda x: x <= length, cls.results)) | |
| else: | |
| for index, location in enumerate(cls.results): | |
| if location < 1: | |
| cls.results[index] += length | |
| else: | |
| break | |
| for index, location in enumerate(cls.results[::-1]): | |
| if location > length: | |
| cls.results[-(index + 1)] -= length | |
| else: | |
| break | |
| def is_defined(cls): | |
| """Return if recognition sequence and cut are defined. | |
| True if the sequence recognised and cut is constant, | |
| i.e. the recognition site is not degenerated AND the enzyme cut inside | |
| the site. | |
| Related methods: | |
| - RE.is_ambiguous() | |
| - RE.is_unknown() | |
| """ | |
| return False | |
| def is_ambiguous(cls): | |
| """Return if recognition sequence and cut may be ambiguous. | |
| True if the sequence recognised and cut is ambiguous, | |
| i.e. the recognition site is degenerated AND/OR the enzyme cut outside | |
| the site. | |
| Related methods: | |
| - RE.is_defined() | |
| - RE.is_unknown() | |
| """ | |
| return True | |
| def is_unknown(cls): | |
| """Return if recognition sequence is unknown. | |
| True if the sequence is unknown, | |
| i.e. the recognition site has not been characterised yet. | |
| Related methods: | |
| - RE.is_defined() | |
| - RE.is_ambiguous() | |
| """ | |
| return False | |
| def _mod2(cls, other): | |
| """Test if other enzyme produces compatible ends for enzyme (PRIVATE). | |
| For internal use only. | |
| Test for the compatibility of restriction ending of RE and other. | |
| """ | |
| # | |
| # called by RE._mod1(other) when the one of the enzyme is ambiguous | |
| # | |
| if len(cls.ovhgseq) != len(other.ovhgseq): | |
| return False | |
| else: | |
| se = cls.ovhgseq | |
| for base in se: | |
| if base in "ATCG": | |
| pass | |
| if base in "N": | |
| se = ".".join(se.split("N")) | |
| if base in "RYWMSKHDBV": | |
| expand = "[" + matching[base] + "]" | |
| se = expand.join(se.split(base)) | |
| if re.match(se, other.ovhgseq): | |
| return True | |
| else: | |
| return False | |
| def elucidate(cls): | |
| """Return a string representing the recognition site and cuttings. | |
| Return a representation of the site with the cut on the (+) strand | |
| represented as '^' and the cut on the (-) strand as '_'. | |
| ie: | |
| >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI | |
| >>> EcoRI.elucidate() # 5' overhang | |
| 'G^AATT_C' | |
| >>> KpnI.elucidate() # 3' overhang | |
| 'G_GTAC^C' | |
| >>> EcoRV.elucidate() # blunt | |
| 'GAT^_ATC' | |
| >>> SnaI.elucidate() # NotDefined, cut profile unknown. | |
| '? GTATAC ?' | |
| >>> | |
| """ | |
| f5 = cls.fst5 | |
| f3 = cls.fst3 | |
| length = len(cls) | |
| site = cls.site | |
| if cls.cut_twice(): | |
| re = "cut twice, not yet implemented sorry." | |
| elif cls.is_5overhang(): | |
| if f3 == f5 == 0: | |
| re = "N^" + site + "_N" | |
| elif 0 <= f5 <= length and 0 <= f3 + length <= length: | |
| re = site[:f5] + "^" + site[f5:f3] + "_" + site[f3:] | |
| elif 0 <= f5 <= length: | |
| re = site[:f5] + "^" + site[f5:] + f3 * "N" + "_N" | |
| elif 0 <= f3 + length <= length: | |
| re = "N^" + abs(f5) * "N" + site[:f3] + "_" + site[f3:] | |
| elif f3 + length < 0: | |
| re = "N^" + abs(f5) * "N" + "_" + abs(length + f3) * "N" + site | |
| elif f5 > length: | |
| re = site + (f5 - length) * "N" + "^" + (length + f3 - f5) * "N" + "_N" | |
| else: | |
| re = "N^" + abs(f5) * "N" + site + f3 * "N" + "_N" | |
| elif cls.is_blunt(): | |
| if f5 < 0: | |
| re = "N^_" + abs(f5) * "N" + site | |
| elif f5 > length: | |
| re = site + (f5 - length) * "N" + "^_N" | |
| else: | |
| raise ValueError("%s.easyrepr() : error f5=%i" % (cls.name, f5)) | |
| else: | |
| if f3 == 0: | |
| if f5 == 0: | |
| re = "N_" + site + "^N" | |
| else: | |
| re = site + "_" + (f5 - length) * "N" + "^N" | |
| elif 0 < f3 + length <= length and 0 <= f5 <= length: | |
| re = site[:f3] + "_" + site[f3:f5] + "^" + site[f5:] | |
| elif 0 < f3 + length <= length: | |
| re = site[:f3] + "_" + site[f3:] + (f5 - length) * "N" + "^N" | |
| elif 0 <= f5 <= length: | |
| re = "N_" + "N" * (f3 + length) + site[:f5] + "^" + site[f5:] | |
| elif f3 > 0: | |
| re = site + f3 * "N" + "_" + (f5 - f3 - length) * "N" + "^N" | |
| elif f5 < 0: | |
| re = "N_" + abs(f3 - f5 + length) * "N" + "^" + abs(f5) * "N" + site | |
| else: | |
| re = "N_" + abs(f3 + length) * "N" + site + (f5 - length) * "N" + "^N" | |
| return re | |
| class NotDefined(AbstractCut): | |
| """Implement methods for enzymes with non-characterized overhangs. | |
| Correspond to NoCut and Unknown. | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| def _drop(cls): | |
| """Remove cuts that are outsite of the sequence (PRIVATE). | |
| For internal use only. | |
| Drop the site that are situated outside the sequence in linear | |
| sequence. Modify the index for site in circular sequences. | |
| """ | |
| if cls.dna.is_linear(): | |
| return | |
| else: | |
| length = len(cls.dna) | |
| for index, location in enumerate(cls.results): | |
| if location < 1: | |
| cls.results[index] += length | |
| else: | |
| break | |
| for index, location in enumerate(cls.results[:-1]): | |
| if location > length: | |
| cls.results[-(index + 1)] -= length | |
| else: | |
| break | |
| def is_defined(cls): | |
| """Return if recognition sequence and cut are defined. | |
| True if the sequence recognised and cut is constant, | |
| i.e. the recognition site is not degenerated AND the enzyme cut inside | |
| the site. | |
| Related methods: | |
| - RE.is_ambiguous() | |
| - RE.is_unknown() | |
| """ | |
| return False | |
| def is_ambiguous(cls): | |
| """Return if recognition sequence and cut may be ambiguous. | |
| True if the sequence recognised and cut is ambiguous, | |
| i.e. the recognition site is degenerated AND/OR the enzyme cut outside | |
| the site. | |
| Related methods: | |
| - RE.is_defined() | |
| - RE.is_unknown() | |
| """ | |
| return False | |
| def is_unknown(cls): | |
| """Return if recognition sequence is unknown. | |
| True if the sequence is unknown, | |
| i.e. the recognition site has not been characterised yet. | |
| Related methods: | |
| - RE.is_defined() | |
| - RE.is_ambiguous() | |
| """ | |
| return True | |
| def _mod2(cls, other): | |
| """Test if other enzyme produces compatible ends for enzyme (PRIVATE). | |
| For internal use only. | |
| Test for the compatibility of restriction ending of RE and other. | |
| """ | |
| # | |
| # Normally we should not arrive here. But well better safe than | |
| # sorry. | |
| # the overhang is not defined we are compatible with nobody. | |
| # could raise an Error may be rather than return quietly. | |
| # | |
| # return False | |
| raise ValueError( | |
| "%s.mod2(%s), %s : NotDefined. pas glop pas glop!" | |
| % (str(cls), str(other), str(cls)) | |
| ) | |
| def elucidate(cls): | |
| """Return a string representing the recognition site and cuttings. | |
| Return a representation of the site with the cut on the (+) strand | |
| represented as '^' and the cut on the (-) strand as '_'. | |
| ie: | |
| >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI | |
| >>> EcoRI.elucidate() # 5' overhang | |
| 'G^AATT_C' | |
| >>> KpnI.elucidate() # 3' overhang | |
| 'G_GTAC^C' | |
| >>> EcoRV.elucidate() # blunt | |
| 'GAT^_ATC' | |
| >>> SnaI.elucidate() # NotDefined, cut profile unknown. | |
| '? GTATAC ?' | |
| >>> | |
| """ | |
| return f"? {cls.site} ?" | |
| class Commercially_available(AbstractCut): | |
| """Implement methods for enzymes which are commercially available. | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| # | |
| # Recent addition to Rebase make this naming convention uncertain. | |
| # May be better to says enzymes which have a supplier. | |
| # | |
| def suppliers(cls): | |
| """Print a list of suppliers of the enzyme.""" | |
| for s in cls.suppl: | |
| print(suppliers_dict[s][0] + ",") | |
| def supplier_list(cls): | |
| """Return a list of suppliers of the enzyme.""" | |
| return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl] | |
| def buffers(cls, supplier): | |
| """Return the recommended buffer of the supplier for this enzyme. | |
| Not implemented yet. | |
| """ | |
| def is_comm(cls): | |
| """Return if enzyme is commercially available. | |
| True if RE has suppliers. | |
| """ | |
| return True | |
| class Not_available(AbstractCut): | |
| """Implement methods for enzymes which are not commercially available. | |
| Internal use only. Not meant to be instantiated. | |
| """ | |
| def suppliers(): | |
| """Print a list of suppliers of the enzyme.""" | |
| return None | |
| def supplier_list(cls): | |
| """Return a list of suppliers of the enzyme.""" | |
| return [] | |
| def buffers(cls, supplier): | |
| """Return the recommended buffer of the supplier for this enzyme. | |
| Not implemented yet. | |
| """ | |
| raise TypeError("Enzyme not commercially available.") | |
| def is_comm(cls): | |
| """Return if enzyme is commercially available. | |
| True if RE has suppliers. | |
| """ | |
| return False | |
| ############################################################################### | |
| # # | |
| # Restriction Batch # | |
| # # | |
| ############################################################################### | |
| class RestrictionBatch(set): | |
| """Class for operations on more than one enzyme.""" | |
| def __init__(self, first=(), suppliers=()): | |
| """Initialize empty RB or pre-fill with enzymes (from supplier).""" | |
| first = [self.format(x) for x in first] | |
| first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] | |
| set.__init__(self, first) | |
| self.mapping = dict.fromkeys(self) | |
| self.already_mapped = None | |
| self.suppliers = [x for x in suppliers if x in suppliers_dict] | |
| def __str__(self): | |
| """Return a readable representation of the ``RestrictionBatch``.""" | |
| if len(self) < 5: | |
| return "+".join(self.elements()) | |
| else: | |
| return "...".join( | |
| ("+".join(self.elements()[:2]), "+".join(self.elements()[-2:])) | |
| ) | |
| def __repr__(self): | |
| """Represent ``RestrictionBatch`` class as a string for debugging.""" | |
| return f"RestrictionBatch({self.elements()})" | |
| def __contains__(self, other): | |
| """Implement ``in`` for ``RestrictionBatch``.""" | |
| try: | |
| other = self.format(other) | |
| except ValueError: # other is not a restriction enzyme | |
| return False | |
| return set.__contains__(self, other) | |
| def __div__(self, other): | |
| """Override '/' operator to use as search method.""" | |
| return self.search(other) | |
| def __rdiv__(self, other): | |
| """Override division with reversed operands to use as search method.""" | |
| return self.search(other) | |
| def __truediv__(self, other): | |
| """Override Python 3 division operator to use as search method. | |
| Like __div__. | |
| """ | |
| return self.search(other) | |
| def __rtruediv__(self, other): | |
| """As __truediv___, with reversed operands. | |
| Like __rdiv__. | |
| """ | |
| return self.search(other) | |
| def get(self, enzyme, add=False): | |
| """Check if enzyme is in batch and return it. | |
| If add is True and enzyme is not in batch add enzyme to batch. | |
| If add is False (which is the default) only return enzyme. | |
| If enzyme is not a RestrictionType or can not be evaluated to | |
| a RestrictionType, raise a ValueError. | |
| """ | |
| e = self.format(enzyme) | |
| if e in self: | |
| return e | |
| elif add: | |
| self.add(e) | |
| return e | |
| else: | |
| raise ValueError(f"enzyme {e.__name__} is not in RestrictionBatch") | |
| def lambdasplit(self, func): | |
| """Filter enzymes in batch with supplied function. | |
| The new batch will contain only the enzymes for which | |
| func return True. | |
| """ | |
| d = list(filter(func, self)) | |
| new = RestrictionBatch() | |
| new._data = dict(zip(d, [True] * len(d))) | |
| return new | |
| def add_supplier(self, letter): | |
| """Add all enzymes from a given supplier to batch. | |
| letter represents the suppliers as defined in the dictionary | |
| RestrictionDictionary.suppliers | |
| Returns None. | |
| Raise a KeyError if letter is not a supplier code. | |
| """ | |
| supplier = suppliers_dict[letter] | |
| self.suppliers.append(letter) | |
| for x in supplier[1]: | |
| self.add_nocheck(eval(x)) | |
| def current_suppliers(self): | |
| """List the current suppliers for the restriction batch. | |
| Return a sorted list of the suppliers which have been used to | |
| create the batch. | |
| """ | |
| suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers) | |
| return suppl_list | |
| def __iadd__(self, other): | |
| """Override '+=' for use with sets. | |
| b += other -> add other to b, check the type of other. | |
| """ | |
| self.add(other) | |
| return self | |
| def __add__(self, other): | |
| """Override '+' for use with sets. | |
| b + other -> new RestrictionBatch. | |
| """ | |
| new = self.__class__(self) | |
| new.add(other) | |
| return new | |
| def remove(self, other): | |
| """Remove enzyme from restriction batch. | |
| Safe set.remove method. Verify that other is a RestrictionType or can | |
| be evaluated to a RestrictionType. | |
| Raise a ValueError if other can not be evaluated to a RestrictionType. | |
| Raise a KeyError if other is not in B. | |
| """ | |
| return set.remove(self, self.format(other)) | |
| def add(self, other): | |
| """Add a restriction enzyme to the restriction batch. | |
| Safe set.add method. Verify that other is a RestrictionType or can be | |
| evaluated to a RestrictionType. | |
| Raise a ValueError if other can not be evaluated to a RestrictionType. | |
| """ | |
| return set.add(self, self.format(other)) | |
| def add_nocheck(self, other): | |
| """Add restriction enzyme to batch without checking its type.""" | |
| return set.add(self, other) | |
| def format(self, y): | |
| """Evaluate enzyme (name) and return it (as RestrictionType). | |
| If y is a RestrictionType return y. | |
| If y can be evaluated to a RestrictionType return eval(y). | |
| Raise a ValueError in all other case. | |
| """ | |
| try: | |
| if isinstance(y, RestrictionType): | |
| return y | |
| elif isinstance(eval(str(y)), RestrictionType): | |
| return eval(y) | |
| except (NameError, SyntaxError): | |
| pass | |
| raise ValueError(f"{y.__class__} is not a RestrictionType") | |
| def is_restriction(self, y): | |
| """Return if enzyme (name) is a known enzyme. | |
| True if y or eval(y) is a RestrictionType. | |
| """ | |
| return isinstance(y, RestrictionType) or isinstance( | |
| eval(str(y)), RestrictionType | |
| ) | |
| def split(self, *classes, **bool): | |
| """Extract enzymes of a certain class and put in new RestrictionBatch. | |
| It works but it is slow, so it has really an interest when splitting | |
| over multiple conditions. | |
| """ | |
| def splittest(element): | |
| for klass in classes: | |
| b = bool.get(klass.__name__, True) | |
| if issubclass(element, klass): | |
| if b: | |
| continue | |
| else: | |
| return False | |
| elif b: | |
| return False | |
| else: | |
| continue | |
| return True | |
| d = list(filter(splittest, self)) | |
| new = RestrictionBatch() | |
| new._data = dict(zip(d, [True] * len(d))) | |
| return new | |
| def elements(self): | |
| """List the enzymes of the RestrictionBatch as list of strings. | |
| Give all the names of the enzymes in B sorted alphabetically. | |
| """ | |
| return sorted(str(e) for e in self) | |
| def as_string(self): | |
| """List the names of the enzymes of the RestrictionBatch. | |
| Return a list of the name of the elements of the batch. | |
| """ | |
| return [str(e) for e in self] | |
| def suppl_codes(cls): | |
| """Return a dictionary with supplier codes. | |
| Letter code for the suppliers. | |
| """ | |
| supply = {k: v[0] for k, v in suppliers_dict.items()} | |
| return supply | |
| def show_codes(cls): | |
| """Print a list of supplier codes.""" | |
| supply = [" = ".join(i) for i in cls.suppl_codes().items()] | |
| print("\n".join(supply)) | |
| def search(self, dna, linear=True): | |
| """Return a dic of cutting sites in the seq for the batch enzymes.""" | |
| # | |
| # here we replace the search method of the individual enzymes | |
| # with one unique testing method. | |
| # | |
| if not hasattr(self, "already_mapped"): | |
| # TODO - Why does this happen! | |
| # Try the "doctest" at the start of PrintFormat.py | |
| self.already_mapped = None | |
| if isinstance(dna, DNA): | |
| # For the searching, we just care about the sequence as a string, | |
| # if that is the same we can use the cached search results. | |
| # At the time of writing, Seq == method isn't implemented, | |
| # and therefore does object identity which is stricter. | |
| if (str(dna), linear) == self.already_mapped: | |
| return self.mapping | |
| else: | |
| self.already_mapped = str(dna), linear | |
| fseq = FormattedSeq(dna, linear) | |
| self.mapping = {x: x.search(fseq) for x in self} | |
| return self.mapping | |
| elif isinstance(dna, FormattedSeq): | |
| if (str(dna), dna.linear) == self.already_mapped: | |
| return self.mapping | |
| else: | |
| self.already_mapped = str(dna), dna.linear | |
| self.mapping = {x: x.search(dna) for x in self} | |
| return self.mapping | |
| raise TypeError(f"Expected Seq or MutableSeq instance, got {type(dna)} instead") | |
| ############################################################################### | |
| # # | |
| # Restriction Analysis # | |
| # # | |
| ############################################################################### | |
| _empty_DNA = DNA("") | |
| _restrictionbatch = RestrictionBatch() | |
| class Analysis(RestrictionBatch, PrintFormat): | |
| """Provide methods for enhanced analysis and pretty printing.""" | |
| def __init__( | |
| self, restrictionbatch=_restrictionbatch, sequence=_empty_DNA, linear=True | |
| ): | |
| """Initialize an Analysis with RestrictionBatch and sequence. | |
| For most of the methods of this class if a dictionary is given it will | |
| be used as the base to calculate the results. | |
| If no dictionary is given a new analysis using the RestrictionBatch | |
| which has been given when the Analysis class has been instantiated, | |
| will be carried out and used. | |
| """ | |
| RestrictionBatch.__init__(self, restrictionbatch) | |
| self.rb = restrictionbatch | |
| self.sequence = sequence | |
| self.linear = linear | |
| if self.sequence: | |
| self.search(self.sequence, self.linear) | |
| def __repr__(self): | |
| """Represent ``Analysis`` class as a string.""" | |
| return f"Analysis({self.rb!r},{self.sequence!r},{self.linear})" | |
| def _sub_set(self, wanted): | |
| """Filter result for keys which are in wanted (PRIVATE). | |
| Internal use only. Returns a dict. | |
| Screen the results through wanted set. | |
| Keep only the results for which the enzymes is in wanted set. | |
| """ | |
| # It seems that this method is not used in the whole class! | |
| return {k: v for k, v in self.mapping.items() if k in wanted} | |
| def _boundaries(self, start, end): | |
| """Set boundaries to correct values (PRIVATE). | |
| Format the boundaries for use with the methods that limit the | |
| search to only part of the sequence given to analyse. | |
| """ | |
| if not isinstance(start, int): | |
| raise TypeError(f"expected int, got {type(start)} instead") | |
| if not isinstance(end, int): | |
| raise TypeError(f"expected int, got {type(end)} instead") | |
| if start < 1: # Looks like this tries to do python list like indexing | |
| start += len(self.sequence) | |
| if end < 1: | |
| end += len(self.sequence) | |
| if start < end: | |
| pass | |
| else: | |
| start, end = end, start | |
| if start < end: | |
| return start, end, self._test_normal | |
| def _test_normal(self, start, end, site): | |
| """Test if site is between start and end (PRIVATE). | |
| Internal use only | |
| """ | |
| return start <= site < end | |
| def _test_reverse(self, start, end, site): | |
| """Test if site is between end and start, for circular sequences (PRIVATE). | |
| Internal use only. | |
| """ | |
| return start <= site <= len(self.sequence) or 1 <= site < end | |
| def format_output(self, dct=None, title="", s1=""): | |
| """Collect data and pass to PrintFormat. | |
| If dct is not given the full dictionary is used. | |
| """ | |
| if not dct: | |
| dct = self.mapping | |
| return PrintFormat.format_output(self, dct, title, s1) | |
| def print_that(self, dct=None, title="", s1=""): | |
| """Print the output of the analysis. | |
| If dct is not given the full dictionary is used. | |
| s1: Title for non-cutting enzymes | |
| This method prints the output of A.format_output() and it is here | |
| for backwards compatibility. | |
| """ | |
| print(self.format_output(dct, title, s1)) | |
| def change(self, **what): | |
| """Change parameters of print output. | |
| It is possible to change the width of the shell by setting | |
| self.ConsoleWidth to what you want. | |
| self.NameWidth refer to the maximal length of the enzyme name. | |
| Changing one of these parameters here might not give the results | |
| you expect. In which case, you can settle back to a 80 columns shell | |
| or try to change self.Cmodulo and self.PrefWidth in PrintFormat until | |
| you get it right. | |
| """ | |
| for k, v in what.items(): | |
| if k in ("NameWidth", "ConsoleWidth"): | |
| setattr(self, k, v) | |
| self.Cmodulo = self.ConsoleWidth % self.NameWidth | |
| self.PrefWidth = self.ConsoleWidth - self.Cmodulo | |
| elif k == "sequence": | |
| setattr(self, "sequence", v) | |
| self.search(self.sequence, self.linear) | |
| elif k == "rb": | |
| self = Analysis.__init__(self, v, self.sequence, self.linear) | |
| elif k == "linear": | |
| setattr(self, "linear", v) | |
| self.search(self.sequence, v) | |
| elif k in ("Indent", "Maxsize"): | |
| setattr(self, k, v) | |
| elif k in ("Cmodulo", "PrefWidth"): | |
| raise AttributeError( | |
| f"To change {k}, change NameWidth and/or ConsoleWidth" | |
| ) | |
| else: | |
| raise AttributeError(f"Analysis has no attribute {k}") | |
| def full(self, linear=True): | |
| """Perform analysis with all enzymes of batch and return all results. | |
| Full Restriction Map of the sequence, as a dictionary. | |
| """ | |
| return self.mapping | |
| def blunt(self, dct=None): | |
| """Return only cuts that have blunt ends.""" | |
| if not dct: | |
| dct = self.mapping | |
| return {k: v for k, v in dct.items() if k.is_blunt()} | |
| def overhang5(self, dct=None): | |
| """Return only cuts that have 5' overhangs.""" | |
| if not dct: | |
| dct = self.mapping | |
| return {k: v for k, v in dct.items() if k.is_5overhang()} | |
| def overhang3(self, dct=None): | |
| """Return only cuts that have 3' overhangs.""" | |
| if not dct: | |
| dct = self.mapping | |
| return {k: v for k, v in dct.items() if k.is_3overhang()} | |
| def defined(self, dct=None): | |
| """Return only results from enzymes that produce defined overhangs.""" | |
| if not dct: | |
| dct = self.mapping | |
| return {k: v for k, v in dct.items() if k.is_defined()} | |
| def with_sites(self, dct=None): | |
| """Return only results from enzyme with at least one cut.""" | |
| if not dct: | |
| dct = self.mapping | |
| return {k: v for k, v in dct.items() if v} | |
| def without_site(self, dct=None): | |
| """Return only results from enzymes that don't cut the sequence.""" | |
| if not dct: | |
| dct = self.mapping | |
| return {k: v for k, v in dct.items() if not v} | |
| def with_N_sites(self, N, dct=None): | |
| """Return only results from enzymes that cut the sequence N times.""" | |
| if not dct: | |
| dct = self.mapping | |
| return {k: v for k, v in dct.items() if len(v) == N} | |
| def with_number_list(self, list, dct=None): | |
| """Return only results from enzymes that cut (x,y,z,...) times.""" | |
| if not dct: | |
| dct = self.mapping | |
| return {k: v for k, v in dct.items() if len(v) in list} | |
| def with_name(self, names, dct=None): | |
| """Return only results from enzymes which names are listed.""" | |
| for i, enzyme in enumerate(names): | |
| if enzyme not in AllEnzymes: | |
| warnings.warn(f"no data for the enzyme: {enzyme}", BiopythonWarning) | |
| del names[i] | |
| if not dct: | |
| return RestrictionBatch(names).search(self.sequence, self.linear) | |
| return {n: dct[n] for n in names if n in dct} | |
| def with_site_size(self, site_size, dct=None): | |
| """Return only results form enzymes with a given site size.""" | |
| sites = [name for name in self if name.size == site_size] | |
| if not dct: | |
| return RestrictionBatch(sites).search(self.sequence) | |
| return {k: v for k, v in dct.items() if k in site_size} | |
| def only_between(self, start, end, dct=None): | |
| """Return only results from enzymes that only cut within start, end.""" | |
| start, end, test = self._boundaries(start, end) | |
| if not dct: | |
| dct = self.mapping | |
| d = dict(dct) | |
| for key, sites in dct.items(): | |
| if not sites: | |
| del d[key] | |
| continue | |
| for site in sites: | |
| if test(start, end, site): | |
| continue | |
| else: | |
| del d[key] | |
| break | |
| return d | |
| def between(self, start, end, dct=None): | |
| """Return only results from enzymes that cut at least within borders. | |
| Enzymes that cut the sequence at least in between start and end. | |
| They may cut outside as well. | |
| """ | |
| start, end, test = self._boundaries(start, end) | |
| d = {} | |
| if not dct: | |
| dct = self.mapping | |
| for key, sites in dct.items(): | |
| for site in sites: | |
| if test(start, end, site): | |
| d[key] = sites | |
| break | |
| continue | |
| return d | |
| def show_only_between(self, start, end, dct=None): | |
| """Return only results from within start, end. | |
| Enzymes must cut inside start/end and may also cut outside. However, | |
| only the cutting positions within start/end will be returned. | |
| """ | |
| d = [] | |
| if start <= end: | |
| d = [ | |
| (k, [vv for vv in v if start <= vv <= end]) | |
| for k, v in self.between(start, end, dct).items() | |
| ] | |
| else: | |
| d = [ | |
| (k, [vv for vv in v if start <= vv or vv <= end]) | |
| for k, v in self.between(start, end, dct).items() | |
| ] | |
| return dict(d) | |
| def only_outside(self, start, end, dct=None): | |
| """Return only results from enzymes that only cut outside start, end. | |
| Enzymes that cut the sequence outside of the region | |
| in between start and end but do not cut inside. | |
| """ | |
| start, end, test = self._boundaries(start, end) | |
| if not dct: | |
| dct = self.mapping | |
| d = dict(dct) | |
| for key, sites in dct.items(): | |
| if not sites: | |
| del d[key] | |
| continue | |
| for site in sites: | |
| if test(start, end, site): | |
| del d[key] | |
| break | |
| else: | |
| continue | |
| return d | |
| def outside(self, start, end, dct=None): | |
| """Return only results from enzymes that at least cut outside borders. | |
| Enzymes that cut outside the region in between start and end. | |
| They may cut inside as well. | |
| """ | |
| start, end, test = self._boundaries(start, end) | |
| if not dct: | |
| dct = self.mapping | |
| d = {} | |
| for key, sites in dct.items(): | |
| for site in sites: | |
| if test(start, end, site): | |
| continue | |
| else: | |
| d[key] = sites | |
| break | |
| return d | |
| def do_not_cut(self, start, end, dct=None): | |
| """Return only results from enzymes that don't cut between borders.""" | |
| if not dct: | |
| dct = self.mapping | |
| d = self.without_site() | |
| d.update(self.only_outside(start, end, dct)) | |
| return d | |
| # | |
| # The restriction enzyme classes are created dynamically when the module is | |
| # imported. Here is the magic which allow the creation of the | |
| # restriction-enzyme classes. | |
| # | |
| # The reason for the two dictionaries in Restriction_Dictionary | |
| # one for the types (which will be called pseudo-type as they really | |
| # correspond to the values that instances of RestrictionType can take) | |
| # and one for the enzymes is efficiency as the bases are evaluated | |
| # once per pseudo-type. | |
| # | |
| # However Restriction is still a very inefficient module at import. But | |
| # remember that around 660 classes (which is more or less the size of Rebase) | |
| # have to be created dynamically. However, this processing take place only | |
| # once. | |
| # This inefficiency is however largely compensated by the use of metaclass | |
| # which provide a very efficient layout for the class themselves mostly | |
| # alleviating the need of if/else loops in the class methods. | |
| # | |
| # It is essential to run Restriction with doc string optimisation (-OO | |
| # switch) as the doc string of 660 classes take a lot of processing. | |
| # | |
| CommOnly = RestrictionBatch() # commercial enzymes | |
| NonComm = RestrictionBatch() # not available commercially | |
| for TYPE, (bases, enzymes) in typedict.items(): | |
| # | |
| # The keys are the pseudo-types TYPE (stored as type1, type2...) | |
| # The names are not important and are only present to differentiate | |
| # the keys in the dict. All the pseudo-types are in fact RestrictionType. | |
| # These names will not be used after and the pseudo-types are not | |
| # kept in the locals() dictionary. It is therefore impossible to | |
| # import them. | |
| # Now, if you have look at the dictionary, you will see that not all the | |
| # types are present as those without corresponding enzymes have been | |
| # removed by Dictionary_Builder(). | |
| # | |
| # The values are tuples which contain | |
| # as first element a tuple of bases (as string) and | |
| # as second element the names of the enzymes. | |
| # | |
| # First eval the bases. | |
| # | |
| bases = tuple(eval(x) for x in bases) | |
| # | |
| # now create the particular value of RestrictionType for the classes | |
| # in enzymes. | |
| # | |
| T = type.__new__(RestrictionType, "RestrictionType", bases, {}) | |
| for k in enzymes: | |
| # | |
| # Now, we go through all the enzymes and assign them their type. | |
| # enzymedict[k] contains the values of the attributes for this | |
| # particular class (self.site, self.ovhg,....). | |
| # | |
| newenz = T(k, bases, enzymedict[k]) | |
| # | |
| # we add the enzymes to the corresponding batch. | |
| # | |
| # No need to verify the enzyme is a RestrictionType -> add_nocheck | |
| # | |
| if newenz.is_comm(): | |
| CommOnly.add_nocheck(newenz) | |
| else: | |
| NonComm.add_nocheck(newenz) | |
| # | |
| # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. | |
| # | |
| AllEnzymes = RestrictionBatch(CommOnly) | |
| AllEnzymes.update(NonComm) | |
| # | |
| # Now, place the enzymes in locals so they can be imported. | |
| # | |
| names = [str(x) for x in AllEnzymes] | |
| locals().update(dict(zip(names, AllEnzymes))) | |
| __all__ = ( | |
| "FormattedSeq", | |
| "Analysis", | |
| "RestrictionBatch", | |
| "AllEnzymes", | |
| "CommOnly", | |
| "NonComm", | |
| ) + tuple(names) | |
| del k, enzymes, TYPE, bases, names | |