Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /SearchIO /_model /hit.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 2 years ago

raw

history blame contribute delete

17.3 kB

	# Copyright 2012 by Wibowo Arindrarto. All rights reserved.
	# This file is part of the Biopython distribution and governed by your
	# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
	# Please see the LICENSE file that should have been included as part of this
	# package.
	"""Bio.SearchIO object to model a single database hit."""


	from itertools import chain

	from Bio.SearchIO._utils import allitems, optionalcascade, getattr_str

	from ._base import _BaseSearchObject
	from .hsp import HSP


	class Hit(_BaseSearchObject):
	"""Class representing a single database hit of a search result.

	Hit objects are the second-level container in the SearchIO module. They
	are the objects contained within a QueryResult (see QueryResult). They
	themselves are container for HSP objects and will contain at least one
	HSP.

	To have a quick look at a Hit and its contents, invoke ``print`` on it::

	>>> from Bio import SearchIO
	>>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
	>>> hit = qresult[3]
	>>> print(hit)
	Query: 33211
	mir_1
	Hit: gi\|301171322\|ref\|NR_035857.1\| (86)
	Pan troglodytes microRNA mir-520c (MIR520C), microRNA
	HSPs: ---- -------- --------- ------ --------------- ---------------------
	# E-value Bit score Span Query range Hit range
	---- -------- --------- ------ --------------- ---------------------
	0 8.9e-20 100.47 60 [1:61] [13:73]
	1 3.3e-06 55.39 60 [0:60] [13:73]

	You can invoke ``len`` on a Hit object to see how many HSP objects it contains::

	>>> len(hit)
	2

	Hit objects behave very similar to Python lists. You can retrieve the HSP
	object inside a Hit using the HSP's integer index. Hit objects can also be
	sliced, which will return a new Hit objects containing only the sliced HSPs::

	# HSP items inside the Hit can be retrieved using its integer index
	>>> hit[0]
	HSP(hit_id='gi\|301171322\|ref\|NR_035857.1\|', query_id='33211', 1 fragments)

	# slicing returns a new Hit
	>>> hit
	Hit(id='gi\|301171322\|ref\|NR_035857.1\|', query_id='33211', 2 hsps)
	>>> hit[:1]
	Hit(id='gi\|301171322\|ref\|NR_035857.1\|', query_id='33211', 1 hsps)
	>>> print(hit[1:])
	Query: 33211
	mir_1
	Hit: gi\|301171322\|ref\|NR_035857.1\| (86)
	Pan troglodytes microRNA mir-520c (MIR520C), microRNA
	HSPs: ---- -------- --------- ------ --------------- ---------------------
	# E-value Bit score Span Query range Hit range
	---- -------- --------- ------ --------------- ---------------------
	0 3.3e-06 55.39 60 [0:60] [13:73]

	Hit objects provide ``filter`` and ``map`` methods, which are analogous to
	Python's built-in ``filter`` and ``map`` except that they return a new Hit
	object instead of a list.

	Here is an example of using ``filter`` to select for HSPs whose e-value is
	less than 1e-10::

	>>> evalue_filter = lambda hsp: hsp.evalue < 1e-10
	>>> filtered_hit = hit.filter(evalue_filter)
	>>> len(hit)
	2
	>>> len(filtered_hit)
	1
	>>> print(filtered_hit)
	Query: 33211
	mir_1
	Hit: gi\|301171322\|ref\|NR_035857.1\| (86)
	Pan troglodytes microRNA mir-520c (MIR520C), microRNA
	HSPs: ---- -------- --------- ------ --------------- ---------------------
	# E-value Bit score Span Query range Hit range
	---- -------- --------- ------ --------------- ---------------------
	0 8.9e-20 100.47 60 [1:61] [13:73]

	There are also other methods which are counterparts of Python lists' methods
	with the same names: ``append``, ``index``, ``pop``, and ``sort``. Consult their
	respective documentations for more details and examples of their usage.

	"""

	# attributes we don't want to transfer when creating a new Hit class
	# from this one
	_NON_STICKY_ATTRS = ("_items",)

	def __init__(self, hsps=(), id=None, query_id=None):
	"""Initialize a Hit object.

	:param hsps: HSP objects contained in the Hit object
	:type hsps: iterable yielding HSP
	:param id: hit ID
	:type id: string
	:param query_id: query ID
	:type query_id: string

	If multiple HSP objects are used for initialization, they must all
	have the same ``query_id``, ``query_description``, ``hit_id``, and
	``hit_description`` properties.
	"""
	# default attribute values
	self._id = id
	self._id_alt = []
	self._query_id = query_id
	self._description = None
	self._description_alt = []
	self._query_description = None
	self.attributes = {}
	self.dbxrefs = []

	# TODO - Move this into the for look below in case
	# hsps is a single use iterator?
	for attr in ("query_id", "query_description", "hit_id", "hit_description"):
	# HACK: setting the if clause to '> 1' allows for empty hit objects.
	# This makes it easier to work with file formats with unpredictable
	# hit-hsp ordering. The empty hit object itself is nonfunctional,
	# however, since all its cascading properties are empty.
	if len({getattr(hsp, attr) for hsp in hsps}) > 1:
	raise ValueError(
	"Hit object can not contain HSPs with more than one %s." % attr
	)

	self._items = []
	for hsp in hsps:
	# validate each HSP
	self._validate_hsp(hsp)
	# and store it them as an instance attribute
	self.append(hsp)

	def __repr__(self):
	"""Return string representation of Hit object."""
	return f"Hit(id={self.id!r}, query_id={self.query_id!r}, {len(self)!r} hsps)"

	def __iter__(self):
	"""Iterate over hsps."""
	return iter(self.hsps)

	def __len__(self):
	"""Return number of hsps."""
	return len(self.hsps)

	def __bool__(self):
	"""Return True if there are hsps."""
	return bool(self.hsps)

	def __contains__(self, hsp):
	"""Return True if hsp in items."""
	return hsp in self._items

	def __str__(self):
	"""Return a human readable summary of the Hit object."""
	lines = []

	# set query id line
	qid_line = "Query: %s" % self.query_id
	lines.append(qid_line)
	if self.query_description:
	line = " %s" % self.query_description
	line = line[:77] + "..." if len(line) > 80 else line
	lines.append(line)

	# set hit id line
	hid_line = " Hit: %s" % self.id
	try:
	seq_len = self.seq_len
	except AttributeError:
	pass
	else:
	hid_line += " (%i)" % seq_len
	lines.append(hid_line)
	if self.description:
	line = " %s" % self.description
	line = line[:77] + "..." if len(line) > 80 else line
	lines.append(line)

	# set attributes lines
	for key, value in sorted(self.attributes.items()):
	lines.append(f" {key}: {value}")

	# set dbxrefs line
	if self.dbxrefs:
	lines.append("Database cross-references: " + ", ".join(self.dbxrefs))

	# set hsp line and table
	if not self.hsps:
	lines.append(" HSPs: ?")
	else:
	lines.append(
	" HSPs: %s %s %s %s %s %s"
	% ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21)
	)
	pattern = "%11s %8s %9s %6s %15s %21s"
	lines.append(
	pattern
	% ("#", "E-value", "Bit score", "Span", "Query range", "Hit range")
	)
	lines.append(
	pattern % ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21)
	)
	for idx, hsp in enumerate(self.hsps):
	# evalue
	evalue = getattr_str(hsp, "evalue", fmt="%.2g")
	# bitscore
	bitscore = getattr_str(hsp, "bitscore", fmt="%.2f")
	# alignment length
	aln_span = getattr_str(hsp, "aln_span")
	# query region
	query_start = getattr_str(hsp, "query_start")
	query_end = getattr_str(hsp, "query_end")
	query_range = f"[{query_start}:{query_end}]"
	# max column length is 18
	query_range = (
	query_range[:13] + "~]" if len(query_range) > 15 else query_range
	)
	# hit region
	hit_start = getattr_str(hsp, "hit_start")
	hit_end = getattr_str(hsp, "hit_end")
	hit_range = f"[{hit_start}:{hit_end}]"
	hit_range = hit_range[:19] + "~]" if len(hit_range) > 21 else hit_range
	# append the hsp row
	lines.append(
	pattern % (idx, evalue, bitscore, aln_span, query_range, hit_range)
	)

	return "\n".join(lines)

	def __getitem__(self, idx):
	"""Return the HSP object at the given index."""
	# if key is slice, return a new Hit instance
	if isinstance(idx, slice):
	obj = self.__class__(self.hsps[idx])
	self._transfer_attrs(obj)
	return obj
	return self._items[idx]

	def __setitem__(self, idx, hsps):
	"""Assign hsps to index idx."""
	# handle case if hsps is a list of hsp
	if isinstance(hsps, (list, tuple)):
	for hsp in hsps:
	self._validate_hsp(hsp)
	else:
	self._validate_hsp(hsps)

	self._items[idx] = hsps

	def __delitem__(self, idx):
	"""Delete item of index idx."""
	del self._items[idx]

	# hsp properties #
	def _validate_hsp(self, hsp):
	"""Validate an HSP object (PRIVATE).

	Valid HSP objects have the same hit_id as the Hit object ID and the
	same query_id as the Hit object's query_id.

	"""
	if not isinstance(hsp, HSP):
	raise TypeError("Hit objects can only contain HSP objects.")
	# HACK: to make validation during __init__ work
	if self._items:
	if self.id is not None:
	if hsp.hit_id != self.id:
	raise ValueError(
	"Expected HSP with hit ID %r, found %r instead."
	% (self.id, hsp.hit_id)
	)
	else:
	self.id = hsp.hit_id

	if self.description is not None:
	if hsp.hit_description != self.description:
	raise ValueError(
	"Expected HSP with hit description %r, found %r instead."
	% (self.description, hsp.hit_description)
	)
	else:
	self.description = hsp.hit_description

	if self.query_id is not None:
	if hsp.query_id != self.query_id:
	raise ValueError(
	"Expected HSP with query ID %r, found %r instead."
	% (self.query_id, hsp.query_id)
	)
	else:
	self.query_id = hsp.query_id

	if self.query_description is not None:
	if hsp.query_description != self.query_description:
	raise ValueError(
	"Expected HSP with query description %r, found %r instead."
	% (self.query_description, hsp.query_description)
	)
	else:
	self.query_description = hsp.query_description

	# properties #
	description = optionalcascade(
	"_description", "hit_description", """Hit description"""
	)
	query_description = optionalcascade(
	"_query_description",
	"query_description",
	"""Description of the query that produced the hit""",
	)
	id = optionalcascade("_id", "hit_id", """Hit ID string.""")
	query_id = optionalcascade(
	"_query_id", "query_id", """ID string of the query that produced the hit"""
	)
	# returns all hsps
	hsps = allitems(doc="""HSP objects contained in the Hit""")

	@property
	def id_all(self):
	"""Alternative ID(s) of the Hit."""
	return [self.id] + self._id_alt

	@property
	def description_all(self):
	"""Alternative descriptions of the Hit."""
	return [self.description] + self._description_alt

	@property
	def fragments(self):
	"""Access the HSPFragment objects contained in the Hit."""
	return list(chain(*self._items))

	# public methods #
	def append(self, hsp):
	"""Add a HSP object to the end of Hit.

	Parameters
	hsp -- HSP object to append.

	Any HSP object appended must have the same ``hit_id`` property as the
	Hit object's ``id`` property and the same ``query_id`` property as the
	Hit object's ``query_id`` property.

	"""
	self._validate_hsp(hsp)
	self._items.append(hsp)

	def filter(self, func=None):
	"""Create new Hit object whose HSP objects pass the filter function.

	:param func: function for filtering
	:type func: callable, accepts HSP, returns bool

	``filter`` is analogous to Python's built-in ``filter`` function, except
	that instead of returning a list it returns a ``Hit`` object. Here is an
	example of using ``filter`` to select for HSPs having bitscores bigger
	than 60::

	>>> from Bio import SearchIO
	>>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
	>>> hit = qresult[3]
	>>> evalue_filter = lambda hsp: hsp.bitscore > 60
	>>> filtered_hit = hit.filter(evalue_filter)
	>>> len(hit)
	2
	>>> len(filtered_hit)
	1
	>>> print(filtered_hit)
	Query: 33211
	mir_1
	Hit: gi\|301171322\|ref\|NR_035857.1\| (86)
	Pan troglodytes microRNA mir-520c (MIR520C), microRNA
	HSPs: ---- -------- --------- ------ --------------- ---------------------
	# E-value Bit score Span Query range Hit range
	---- -------- --------- ------ --------------- ---------------------
	0 8.9e-20 100.47 60 [1:61] [13:73]

	"""
	hsps = list(filter(func, self.hsps))
	if hsps:
	obj = self.__class__(hsps)
	self._transfer_attrs(obj)
	return obj

	def index(self, hsp):
	"""Return the index of a given HSP object, zero-based.

	:param hsp: object to look up
	:type hsp: HSP

	"""
	return self._items.index(hsp)

	def map(self, func=None):
	"""Create new Hit object, mapping the given function to its HSPs.

	:param func: function for mapping
	:type func: callable, accepts HSP, returns HSP

	``map`` is analogous to Python's built-in ``map`` function. It is applied to
	all HSPs contained in the Hit object and returns a new Hit object.

	"""
	if func is not None:
	hsps = [func(x) for x in self.hsps[:]] # this creates a shallow copy
	else:
	hsps = self.hsps[:]
	if hsps:
	obj = self.__class__(hsps)
	self._transfer_attrs(obj)
	return obj

	def pop(self, index=-1):
	"""Remove and returns the HSP object at the specified index.

	:param index: index of HSP object to pop
	:type index: int

	"""
	return self._items.pop(index)

	def sort(self, key=None, reverse=False, in_place=True):
	"""Sort the HSP objects.

	:param key: sorting function
	:type key: callable, accepts HSP, returns key for sorting
	:param reverse: whether to reverse sorting results or no
	:type reverse: bool
	:param in_place: whether to do in-place sorting or no
	:type in_place: bool

	``sort`` defaults to sorting in-place, to mimic Python's ``list.sort``
	method. If you set the ``in_place`` argument to False, it will treat
	return a new, sorted Hit object and keep the initial one unsorted

	"""
	if in_place:
	self._items.sort(key=key, reverse=reverse)
	else:
	hsps = self.hsps[:]
	hsps.sort(key=key, reverse=reverse)
	obj = self.__class__(hsps)
	self._transfer_attrs(obj)
	return obj


	# if not used as a module, run the doctest
	if __name__ == "__main__":
	from Bio._utils import run_doctest

	run_doctest()