Spaces:
No application file
No application file
| # Copyright 2012 by Wibowo Arindrarto. All rights reserved. | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Bio.SearchIO object to model a single database hit.""" | |
| from itertools import chain | |
| from Bio.SearchIO._utils import allitems, optionalcascade, getattr_str | |
| from ._base import _BaseSearchObject | |
| from .hsp import HSP | |
| class Hit(_BaseSearchObject): | |
| """Class representing a single database hit of a search result. | |
| Hit objects are the second-level container in the SearchIO module. They | |
| are the objects contained within a QueryResult (see QueryResult). They | |
| themselves are container for HSP objects and will contain at least one | |
| HSP. | |
| To have a quick look at a Hit and its contents, invoke ``print`` on it:: | |
| >>> from Bio import SearchIO | |
| >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml')) | |
| >>> hit = qresult[3] | |
| >>> print(hit) | |
| Query: 33211 | |
| mir_1 | |
| Hit: gi|301171322|ref|NR_035857.1| (86) | |
| Pan troglodytes microRNA mir-520c (MIR520C), microRNA | |
| HSPs: ---- -------- --------- ------ --------------- --------------------- | |
| # E-value Bit score Span Query range Hit range | |
| ---- -------- --------- ------ --------------- --------------------- | |
| 0 8.9e-20 100.47 60 [1:61] [13:73] | |
| 1 3.3e-06 55.39 60 [0:60] [13:73] | |
| You can invoke ``len`` on a Hit object to see how many HSP objects it contains:: | |
| >>> len(hit) | |
| 2 | |
| Hit objects behave very similar to Python lists. You can retrieve the HSP | |
| object inside a Hit using the HSP's integer index. Hit objects can also be | |
| sliced, which will return a new Hit objects containing only the sliced HSPs:: | |
| # HSP items inside the Hit can be retrieved using its integer index | |
| >>> hit[0] | |
| HSP(hit_id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 fragments) | |
| # slicing returns a new Hit | |
| >>> hit | |
| Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps) | |
| >>> hit[:1] | |
| Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 hsps) | |
| >>> print(hit[1:]) | |
| Query: 33211 | |
| mir_1 | |
| Hit: gi|301171322|ref|NR_035857.1| (86) | |
| Pan troglodytes microRNA mir-520c (MIR520C), microRNA | |
| HSPs: ---- -------- --------- ------ --------------- --------------------- | |
| # E-value Bit score Span Query range Hit range | |
| ---- -------- --------- ------ --------------- --------------------- | |
| 0 3.3e-06 55.39 60 [0:60] [13:73] | |
| Hit objects provide ``filter`` and ``map`` methods, which are analogous to | |
| Python's built-in ``filter`` and ``map`` except that they return a new Hit | |
| object instead of a list. | |
| Here is an example of using ``filter`` to select for HSPs whose e-value is | |
| less than 1e-10:: | |
| >>> evalue_filter = lambda hsp: hsp.evalue < 1e-10 | |
| >>> filtered_hit = hit.filter(evalue_filter) | |
| >>> len(hit) | |
| 2 | |
| >>> len(filtered_hit) | |
| 1 | |
| >>> print(filtered_hit) | |
| Query: 33211 | |
| mir_1 | |
| Hit: gi|301171322|ref|NR_035857.1| (86) | |
| Pan troglodytes microRNA mir-520c (MIR520C), microRNA | |
| HSPs: ---- -------- --------- ------ --------------- --------------------- | |
| # E-value Bit score Span Query range Hit range | |
| ---- -------- --------- ------ --------------- --------------------- | |
| 0 8.9e-20 100.47 60 [1:61] [13:73] | |
| There are also other methods which are counterparts of Python lists' methods | |
| with the same names: ``append``, ``index``, ``pop``, and ``sort``. Consult their | |
| respective documentations for more details and examples of their usage. | |
| """ | |
| # attributes we don't want to transfer when creating a new Hit class | |
| # from this one | |
| _NON_STICKY_ATTRS = ("_items",) | |
| def __init__(self, hsps=(), id=None, query_id=None): | |
| """Initialize a Hit object. | |
| :param hsps: HSP objects contained in the Hit object | |
| :type hsps: iterable yielding HSP | |
| :param id: hit ID | |
| :type id: string | |
| :param query_id: query ID | |
| :type query_id: string | |
| If multiple HSP objects are used for initialization, they must all | |
| have the same ``query_id``, ``query_description``, ``hit_id``, and | |
| ``hit_description`` properties. | |
| """ | |
| # default attribute values | |
| self._id = id | |
| self._id_alt = [] | |
| self._query_id = query_id | |
| self._description = None | |
| self._description_alt = [] | |
| self._query_description = None | |
| self.attributes = {} | |
| self.dbxrefs = [] | |
| # TODO - Move this into the for look below in case | |
| # hsps is a single use iterator? | |
| for attr in ("query_id", "query_description", "hit_id", "hit_description"): | |
| # HACK: setting the if clause to '> 1' allows for empty hit objects. | |
| # This makes it easier to work with file formats with unpredictable | |
| # hit-hsp ordering. The empty hit object itself is nonfunctional, | |
| # however, since all its cascading properties are empty. | |
| if len({getattr(hsp, attr) for hsp in hsps}) > 1: | |
| raise ValueError( | |
| "Hit object can not contain HSPs with more than one %s." % attr | |
| ) | |
| self._items = [] | |
| for hsp in hsps: | |
| # validate each HSP | |
| self._validate_hsp(hsp) | |
| # and store it them as an instance attribute | |
| self.append(hsp) | |
| def __repr__(self): | |
| """Return string representation of Hit object.""" | |
| return f"Hit(id={self.id!r}, query_id={self.query_id!r}, {len(self)!r} hsps)" | |
| def __iter__(self): | |
| """Iterate over hsps.""" | |
| return iter(self.hsps) | |
| def __len__(self): | |
| """Return number of hsps.""" | |
| return len(self.hsps) | |
| def __bool__(self): | |
| """Return True if there are hsps.""" | |
| return bool(self.hsps) | |
| def __contains__(self, hsp): | |
| """Return True if hsp in items.""" | |
| return hsp in self._items | |
| def __str__(self): | |
| """Return a human readable summary of the Hit object.""" | |
| lines = [] | |
| # set query id line | |
| qid_line = "Query: %s" % self.query_id | |
| lines.append(qid_line) | |
| if self.query_description: | |
| line = " %s" % self.query_description | |
| line = line[:77] + "..." if len(line) > 80 else line | |
| lines.append(line) | |
| # set hit id line | |
| hid_line = " Hit: %s" % self.id | |
| try: | |
| seq_len = self.seq_len | |
| except AttributeError: | |
| pass | |
| else: | |
| hid_line += " (%i)" % seq_len | |
| lines.append(hid_line) | |
| if self.description: | |
| line = " %s" % self.description | |
| line = line[:77] + "..." if len(line) > 80 else line | |
| lines.append(line) | |
| # set attributes lines | |
| for key, value in sorted(self.attributes.items()): | |
| lines.append(f" {key}: {value}") | |
| # set dbxrefs line | |
| if self.dbxrefs: | |
| lines.append("Database cross-references: " + ", ".join(self.dbxrefs)) | |
| # set hsp line and table | |
| if not self.hsps: | |
| lines.append(" HSPs: ?") | |
| else: | |
| lines.append( | |
| " HSPs: %s %s %s %s %s %s" | |
| % ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21) | |
| ) | |
| pattern = "%11s %8s %9s %6s %15s %21s" | |
| lines.append( | |
| pattern | |
| % ("#", "E-value", "Bit score", "Span", "Query range", "Hit range") | |
| ) | |
| lines.append( | |
| pattern % ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21) | |
| ) | |
| for idx, hsp in enumerate(self.hsps): | |
| # evalue | |
| evalue = getattr_str(hsp, "evalue", fmt="%.2g") | |
| # bitscore | |
| bitscore = getattr_str(hsp, "bitscore", fmt="%.2f") | |
| # alignment length | |
| aln_span = getattr_str(hsp, "aln_span") | |
| # query region | |
| query_start = getattr_str(hsp, "query_start") | |
| query_end = getattr_str(hsp, "query_end") | |
| query_range = f"[{query_start}:{query_end}]" | |
| # max column length is 18 | |
| query_range = ( | |
| query_range[:13] + "~]" if len(query_range) > 15 else query_range | |
| ) | |
| # hit region | |
| hit_start = getattr_str(hsp, "hit_start") | |
| hit_end = getattr_str(hsp, "hit_end") | |
| hit_range = f"[{hit_start}:{hit_end}]" | |
| hit_range = hit_range[:19] + "~]" if len(hit_range) > 21 else hit_range | |
| # append the hsp row | |
| lines.append( | |
| pattern % (idx, evalue, bitscore, aln_span, query_range, hit_range) | |
| ) | |
| return "\n".join(lines) | |
| def __getitem__(self, idx): | |
| """Return the HSP object at the given index.""" | |
| # if key is slice, return a new Hit instance | |
| if isinstance(idx, slice): | |
| obj = self.__class__(self.hsps[idx]) | |
| self._transfer_attrs(obj) | |
| return obj | |
| return self._items[idx] | |
| def __setitem__(self, idx, hsps): | |
| """Assign hsps to index idx.""" | |
| # handle case if hsps is a list of hsp | |
| if isinstance(hsps, (list, tuple)): | |
| for hsp in hsps: | |
| self._validate_hsp(hsp) | |
| else: | |
| self._validate_hsp(hsps) | |
| self._items[idx] = hsps | |
| def __delitem__(self, idx): | |
| """Delete item of index idx.""" | |
| del self._items[idx] | |
| # hsp properties # | |
| def _validate_hsp(self, hsp): | |
| """Validate an HSP object (PRIVATE). | |
| Valid HSP objects have the same hit_id as the Hit object ID and the | |
| same query_id as the Hit object's query_id. | |
| """ | |
| if not isinstance(hsp, HSP): | |
| raise TypeError("Hit objects can only contain HSP objects.") | |
| # HACK: to make validation during __init__ work | |
| if self._items: | |
| if self.id is not None: | |
| if hsp.hit_id != self.id: | |
| raise ValueError( | |
| "Expected HSP with hit ID %r, found %r instead." | |
| % (self.id, hsp.hit_id) | |
| ) | |
| else: | |
| self.id = hsp.hit_id | |
| if self.description is not None: | |
| if hsp.hit_description != self.description: | |
| raise ValueError( | |
| "Expected HSP with hit description %r, found %r instead." | |
| % (self.description, hsp.hit_description) | |
| ) | |
| else: | |
| self.description = hsp.hit_description | |
| if self.query_id is not None: | |
| if hsp.query_id != self.query_id: | |
| raise ValueError( | |
| "Expected HSP with query ID %r, found %r instead." | |
| % (self.query_id, hsp.query_id) | |
| ) | |
| else: | |
| self.query_id = hsp.query_id | |
| if self.query_description is not None: | |
| if hsp.query_description != self.query_description: | |
| raise ValueError( | |
| "Expected HSP with query description %r, found %r instead." | |
| % (self.query_description, hsp.query_description) | |
| ) | |
| else: | |
| self.query_description = hsp.query_description | |
| # properties # | |
| description = optionalcascade( | |
| "_description", "hit_description", """Hit description""" | |
| ) | |
| query_description = optionalcascade( | |
| "_query_description", | |
| "query_description", | |
| """Description of the query that produced the hit""", | |
| ) | |
| id = optionalcascade("_id", "hit_id", """Hit ID string.""") | |
| query_id = optionalcascade( | |
| "_query_id", "query_id", """ID string of the query that produced the hit""" | |
| ) | |
| # returns all hsps | |
| hsps = allitems(doc="""HSP objects contained in the Hit""") | |
| def id_all(self): | |
| """Alternative ID(s) of the Hit.""" | |
| return [self.id] + self._id_alt | |
| def description_all(self): | |
| """Alternative descriptions of the Hit.""" | |
| return [self.description] + self._description_alt | |
| def fragments(self): | |
| """Access the HSPFragment objects contained in the Hit.""" | |
| return list(chain(*self._items)) | |
| # public methods # | |
| def append(self, hsp): | |
| """Add a HSP object to the end of Hit. | |
| Parameters | |
| hsp -- HSP object to append. | |
| Any HSP object appended must have the same ``hit_id`` property as the | |
| Hit object's ``id`` property and the same ``query_id`` property as the | |
| Hit object's ``query_id`` property. | |
| """ | |
| self._validate_hsp(hsp) | |
| self._items.append(hsp) | |
| def filter(self, func=None): | |
| """Create new Hit object whose HSP objects pass the filter function. | |
| :param func: function for filtering | |
| :type func: callable, accepts HSP, returns bool | |
| ``filter`` is analogous to Python's built-in ``filter`` function, except | |
| that instead of returning a list it returns a ``Hit`` object. Here is an | |
| example of using ``filter`` to select for HSPs having bitscores bigger | |
| than 60:: | |
| >>> from Bio import SearchIO | |
| >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml')) | |
| >>> hit = qresult[3] | |
| >>> evalue_filter = lambda hsp: hsp.bitscore > 60 | |
| >>> filtered_hit = hit.filter(evalue_filter) | |
| >>> len(hit) | |
| 2 | |
| >>> len(filtered_hit) | |
| 1 | |
| >>> print(filtered_hit) | |
| Query: 33211 | |
| mir_1 | |
| Hit: gi|301171322|ref|NR_035857.1| (86) | |
| Pan troglodytes microRNA mir-520c (MIR520C), microRNA | |
| HSPs: ---- -------- --------- ------ --------------- --------------------- | |
| # E-value Bit score Span Query range Hit range | |
| ---- -------- --------- ------ --------------- --------------------- | |
| 0 8.9e-20 100.47 60 [1:61] [13:73] | |
| """ | |
| hsps = list(filter(func, self.hsps)) | |
| if hsps: | |
| obj = self.__class__(hsps) | |
| self._transfer_attrs(obj) | |
| return obj | |
| def index(self, hsp): | |
| """Return the index of a given HSP object, zero-based. | |
| :param hsp: object to look up | |
| :type hsp: HSP | |
| """ | |
| return self._items.index(hsp) | |
| def map(self, func=None): | |
| """Create new Hit object, mapping the given function to its HSPs. | |
| :param func: function for mapping | |
| :type func: callable, accepts HSP, returns HSP | |
| ``map`` is analogous to Python's built-in ``map`` function. It is applied to | |
| all HSPs contained in the Hit object and returns a new Hit object. | |
| """ | |
| if func is not None: | |
| hsps = [func(x) for x in self.hsps[:]] # this creates a shallow copy | |
| else: | |
| hsps = self.hsps[:] | |
| if hsps: | |
| obj = self.__class__(hsps) | |
| self._transfer_attrs(obj) | |
| return obj | |
| def pop(self, index=-1): | |
| """Remove and returns the HSP object at the specified index. | |
| :param index: index of HSP object to pop | |
| :type index: int | |
| """ | |
| return self._items.pop(index) | |
| def sort(self, key=None, reverse=False, in_place=True): | |
| """Sort the HSP objects. | |
| :param key: sorting function | |
| :type key: callable, accepts HSP, returns key for sorting | |
| :param reverse: whether to reverse sorting results or no | |
| :type reverse: bool | |
| :param in_place: whether to do in-place sorting or no | |
| :type in_place: bool | |
| ``sort`` defaults to sorting in-place, to mimic Python's ``list.sort`` | |
| method. If you set the ``in_place`` argument to False, it will treat | |
| return a new, sorted Hit object and keep the initial one unsorted | |
| """ | |
| if in_place: | |
| self._items.sort(key=key, reverse=reverse) | |
| else: | |
| hsps = self.hsps[:] | |
| hsps.sort(key=key, reverse=reverse) | |
| obj = self.__class__(hsps) | |
| self._transfer_attrs(obj) | |
| return obj | |
| # if not used as a module, run the doctest | |
| if __name__ == "__main__": | |
| from Bio._utils import run_doctest | |
| run_doctest() | |