Spaces:
No application file
No application file
| # Copyright 2005 by Jonathan Taylor. | |
| # All rights reserved. | |
| # This code is part of the Biopython distribution and governed by its | |
| # license. Please see the LICENSE file that should have been included | |
| # as part of this package. | |
| """Cleaved amplified polymorphic sequence (CAPS) markers. | |
| A CAPS marker is a location a DifferentialCutsite as described below and a | |
| set of primers that can be used to visualize this. More information can | |
| be found in the paper `Konieczny and Ausubel (1993)`_ (PMID 8106085). | |
| .. _`Konieczny and Ausubel (1993)`: https://doi.org/10.1046/j.1365-313X.1993.04020403.x | |
| """ | |
| from Bio.Align import MultipleSeqAlignment | |
| from Bio.Seq import Seq | |
| class DifferentialCutsite: | |
| """Differential enzyme cutsite in an alignment. | |
| A differential cutsite is a location in an alignment where an enzyme cuts | |
| at least one sequence and also cannot cut at least one other sequence. | |
| Members: | |
| - start - Where it lives in the alignment. | |
| - enzyme - The enzyme that causes this. | |
| - cuts_in - A list of sequences (as indexes into the alignment) the | |
| enzyme cuts in. | |
| - blocked_in - A list of sequences (as indexes into the alignment) the | |
| enzyme is blocked in. | |
| """ | |
| def __init__(self, **kwds): | |
| """Initialize a DifferentialCutsite. | |
| Each member (as listed in the class description) should be included as a | |
| keyword. | |
| """ | |
| self.start = int(kwds["start"]) | |
| self.enzyme = kwds["enzyme"] | |
| self.cuts_in = kwds["cuts_in"] | |
| self.blocked_in = kwds["blocked_in"] | |
| class AlignmentHasDifferentLengthsError(Exception): | |
| """Exception where sequences in alignment have different lengths.""" | |
| pass | |
| class CAPSMap: | |
| """A map of an alignment showing all possible dcuts. | |
| Members: | |
| - alignment - The alignment that is mapped. | |
| - dcuts - A list of possible CAPS markers in the form of | |
| DifferentialCutsites. | |
| """ | |
| def __init__(self, alignment, enzymes=None): | |
| """Initialize the CAPSMap. | |
| Required: | |
| - alignment - The alignment to be mapped. | |
| Optional: | |
| - enzymes - List of enzymes to be used to create the map. | |
| Defaults to an empty list. | |
| """ | |
| if enzymes is None: | |
| enzymes = [] | |
| if isinstance(alignment, MultipleSeqAlignment): | |
| self.sequences = [rec.seq for rec in alignment] | |
| self.size = len(self.sequences) | |
| self.length = len(self.sequences[0]) | |
| for seq in self.sequences: | |
| if len(seq) != self.length: | |
| raise AlignmentHasDifferentLengthsError | |
| else: # Alignment object | |
| self.sequences = [Seq(s) for s in alignment] | |
| self.size, self.length = alignment.shape | |
| self.alignment = alignment | |
| self.enzymes = enzymes | |
| # look for dcuts | |
| self._digest() | |
| def _digest_with(self, enzyme): | |
| cuts = [] # list of lists, one per sequence | |
| all_seq_cuts = [] | |
| # go through each sequence | |
| for seq in self.sequences: | |
| # grab all the cuts in the sequence | |
| seq_cuts = [cut - enzyme.fst5 for cut in enzyme.search(seq)] | |
| # maintain a list of all cuts in all sequences | |
| all_seq_cuts.extend(seq_cuts) | |
| cuts.append(seq_cuts) | |
| # we sort the all list and remove duplicates | |
| all_seq_cuts = sorted(set(all_seq_cuts)) | |
| for cut in all_seq_cuts: | |
| # test for dcuts | |
| cuts_in = [] | |
| blocked_in = [] | |
| for i, seq in enumerate(self.sequences): | |
| if cut in cuts[i]: | |
| cuts_in.append(i) | |
| else: | |
| blocked_in.append(i) | |
| if cuts_in and blocked_in: | |
| self.dcuts.append( | |
| DifferentialCutsite( | |
| start=cut, enzyme=enzyme, cuts_in=cuts_in, blocked_in=blocked_in | |
| ) | |
| ) | |
| def _digest(self): | |
| self.dcuts = [] | |
| for enzyme in self.enzymes: | |
| self._digest_with(enzyme) | |