Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /Restriction /PrintFormat.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 2 years ago

raw

history blame contribute delete

16.3 kB

	#!/usr/bin/env python
	#
	# Restriction Analysis Libraries.
	# Copyright (C) 2004. Frederic Sohm.
	#
	# This code is part of the Biopython distribution and governed by its
	# license. Please see the LICENSE file that should have been included
	# as part of this package.
	#
	r"""Print the results of restriction enzyme analysis.

	PrintFormat prints the results from restriction analysis in 3 different
	format: list, column or map.

	The easiest way to use it is:

	>>> from Bio.Restriction.PrintFormat import PrintFormat
	>>> from Bio.Restriction.Restriction import RestrictionBatch
	>>> from Bio.Seq import Seq
	>>> pBs_mcs = Seq('GGTACCGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGATATCGAATTC')
	>>> restriction_batch = RestrictionBatch(['EcoRI', 'BamHI', 'ApaI'])
	>>> result = restriction_batch.search(pBs_mcs)
	>>> my_map = PrintFormat()
	>>> my_map.print_that(result, 'My pBluescript mcs analysis:\n',
	... 'No site:\n')
	My pBluescript mcs analysis:
	ApaI : 12.
	EcoRI : 50.
	No site:
	BamHI
	<BLANKLINE>
	>>> my_map.sequence = pBs_mcs
	>>> my_map.print_as("map")
	>>> my_map.print_that(result)
	12 ApaI
	\|
	\| 50 EcoRI
	\| \|
	GGTACCGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGATATCGAATTC
	\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|
	CCATGGCCCGGGGGGGAGCTCCAGCTGCCATAGCTATTCGAACTATAGCTTAAG
	1 54
	<BLANKLINE>
	<BLANKLINE>
	Enzymes which do not cut the sequence.
	<BLANKLINE>
	BamHI
	<BLANKLINE>
	>>>

	Some of the methods of PrintFormat are meant to be overridden by derived
	class.

	Use the following parameters to control the appearance:

	- ConsoleWidth : width of the console used default to 80.
	should never be less than 60.
	- NameWidth : space attributed to the name in PrintList method.
	- Indent : Indent of the second line.
	- MaxSize : Maximal size of the sequence (default=6:
	-> 99 999 bp + 1 trailing ','
	people are unlikely to ask for restriction map of sequences
	bigger than 100.000 bp. This is needed to determine the
	space to be reserved for sites location.

	- MaxSize = 5 => 9.999 bp
	- MaxSize = 6 => 99.999 bp
	- MaxSize = 7 => 999.999 bp

	Example output::

	<------------ ConsoleWidth --------------->
	<- NameWidth ->
	EcoRI : 1, 45, 50, 300, 400, 650,
	700, 1200, 2500.
	<-->
	Indent

	""" # noqa: W291


	import re


	class PrintFormat:
	"""PrintFormat allow the printing of results of restriction analysis."""

	ConsoleWidth = 80
	NameWidth = 10
	MaxSize = 6
	Cmodulo = ConsoleWidth % NameWidth
	PrefWidth = ConsoleWidth - Cmodulo
	Indent = 4
	linesize = PrefWidth - NameWidth

	def print_as(self, what="list"):
	"""Print the results as specified.

	Valid format are:
	'list' -> alphabetical order
	'number' -> number of sites in the sequence
	'map' -> a map representation of the sequence with the sites.

	If you want more flexibility over-ride the virtual method make_format.
	"""
	if what == "map":
	self.make_format = self._make_map
	elif what == "number":
	self.make_format = self._make_number
	else:
	self.make_format = self._make_list

	def format_output(self, dct, title="", s1=""):
	"""Summarise results as a nicely formatted string.

	Arguments:
	- dct is a dictionary as returned by a RestrictionBatch.search()
	- title is the title of the map.
	It must be a formatted string, i.e. you must include the line break.
	- s1 is the title separating the list of enzymes that have sites from
	those without sites.
	- s1 must be a formatted string as well.

	The format of print_that is a list.
	"""
	if not dct:
	dct = self.results
	ls, nc = [], []
	for k, v in dct.items():
	if v:
	ls.append((k, v))
	else:
	nc.append(k)
	return self.make_format(ls, title, nc, s1)

	def print_that(self, dct, title="", s1=""):
	"""Print the output of the format_output method (OBSOLETE).

	Arguments:
	- dct is a dictionary as returned by a RestrictionBatch.search()
	- title is the title of the map.
	It must be a formatted string, i.e. you must include the line break.
	- s1 is the title separating the list of enzymes that have sites from
	those without sites.
	- s1 must be a formatted string as well.

	This method prints the output of A.format_output() and it is here
	for backwards compatibility.
	"""
	print(self.format_output(dct, title, s1))

	def make_format(self, cut=(), title="", nc=(), s1=""):
	"""Virtual method used for formatting results.

	Virtual method.
	Here to be pointed to one of the _make_* methods.
	You can as well create a new method and point make_format to it.
	"""
	return self._make_list(cut, title, nc, s1)

	# _make_* methods to be used with the virtual method make_format

	def _make_list(self, ls, title, nc, s1):
	"""Summarise a list of positions by enzyme (PRIVATE).

	Return a string of form::

	title.

	enzyme1 : position1, position2.
	enzyme2 : position1, position2, position3.

	Arguments:
	- ls is a tuple or list of cutting enzymes.
	- title is the title.
	- nc is a tuple or list of non cutting enzymes.
	- s1 is the sentence before the non cutting enzymes.
	"""
	return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)

	def _make_map(self, ls, title, nc, s1):
	"""Summarise mapping information as a string (PRIVATE).

	Return a string of form::

	\| title.
	\|
	\| enzyme1, position
	\| \|
	\| AAAAAAAAAAAAAAAAAAAAA...
	\| \|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|
	\| TTTTTTTTTTTTTTTTTTTTT...

	Arguments:
	- ls is a list of cutting enzymes.
	- title is the title.
	- nc is a list of non cutting enzymes.
	- s1 is the sentence before the non cutting enzymes.
	"""
	return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)

	def _make_number(self, ls, title, nc, s1):
	"""Format cutting position information as a string (PRIVATE).

	Returns a string in the form::

	title.

	enzyme which cut 1 time:

	enzyme1 : position1.

	enzyme which cut 2 times:

	enzyme2 : position1, position2.
	...

	Arguments:
	- ls is a list of cutting enzymes.
	- title is the title.
	- nc is a list of non cutting enzymes.
	- s1 is the sentence before the non cutting enzymes.
	"""
	return self._make_number_only(ls, title) + self._make_nocut_only(nc, s1)

	def _make_nocut(self, ls, title, nc, s1):
	"""Summarise non-cutting enzymes (PRIVATE).

	Return a formatted string of the non cutting enzymes.

	ls is a list of cutting enzymes -> will not be used.
	Here for compatibility with make_format.

	Arguments:
	- title is the title.
	- nc is a list of non cutting enzymes.
	- s1 is the sentence before the non cutting enzymes.
	"""
	return title + self._make_nocut_only(nc, s1)

	def _make_nocut_only(self, nc, s1, ls=(), title=""):
	"""Summarise non-cutting enzymes (PRIVATE).

	Return a formatted string of the non cutting enzymes.

	Arguments:
	- nc is a tuple or list of non cutting enzymes.
	- s1 is the sentence before the non cutting enzymes.
	"""
	if not nc:
	return s1
	st = ""
	stringsite = s1 or "\n Enzymes which do not cut the sequence.\n\n"
	Join = "".join
	for key in sorted(nc):
	st = Join((st, str.ljust(str(key), self.NameWidth)))
	if len(st) > self.linesize:
	stringsite = Join((stringsite, st, "\n"))
	st = ""
	stringsite = Join((stringsite, st, "\n"))
	return stringsite

	def _make_list_only(self, ls, title, nc=(), s1=""):
	"""Summarise list of positions per enzyme (PRIVATE).

	Return a string of form::

	title.

	enzyme1 : position1, position2.
	enzyme2 : position1, position2, position3.
	...

	Arguments:
	- ls is a tuple or list of results.
	- title is a string.
	- Non cutting enzymes are not included.
	"""
	if not ls:
	return title
	return self.__next_section(ls, title)

	def _make_number_only(self, ls, title, nc=(), s1=""):
	"""Summarise number of cuts as a string (PRIVATE).

	Return a string of form::

	title.

	enzyme which cut 1 time:

	enzyme1 : position1.

	enzyme which cut 2 times:

	enzyme2 : position1, position2.
	...

	Arguments:
	- ls is a list of results.
	- title is a string.
	- Non cutting enzymes are not included.
	"""
	if not ls:
	return title
	ls.sort(key=lambda x: len(x[1]))
	iterator = iter(ls)
	cur_len = 1
	new_sect = []
	for name, sites in iterator:
	length = len(sites)
	if length > cur_len:
	title += "\n\nenzymes which cut %i times :\n\n" % cur_len
	title = self.__next_section(new_sect, title)
	new_sect, cur_len = [(name, sites)], length
	continue
	new_sect.append((name, sites))
	title += "\n\nenzymes which cut %i times :\n\n" % cur_len
	return self.__next_section(new_sect, title)

	def _make_map_only(self, ls, title, nc=(), s1=""):
	"""Make string describing cutting map (PRIVATE).

	Return a string of form::

	\| title.
	\|
	\| enzyme1, position
	\| \|
	\| AAAAAAAAAAAAAAAAAAAAA...
	\| \|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|
	\| TTTTTTTTTTTTTTTTTTTTT...

	Arguments:
	- ls is a list of results.
	- title is a string.
	- Non cutting enzymes are not included.
	"""
	if not ls:
	return title
	resultKeys = sorted(str(x) for x, y in ls)
	map = title or ""
	enzymemap = {}
	for (enzyme, cut) in ls:
	for c in cut:
	if c in enzymemap:
	enzymemap[c].append(str(enzyme))
	else:
	enzymemap[c] = [str(enzyme)]
	mapping = sorted(enzymemap.keys())
	cutloc = {}
	x, counter, length = 0, 0, len(self.sequence)
	for x in range(60, length, 60):
	counter = x - 60
	loc = []
	cutloc[counter] = loc
	remaining = []
	for key in mapping:
	if key <= x:
	loc.append(key)
	else:
	remaining.append(key)
	mapping = remaining
	cutloc[x] = mapping
	sequence = str(self.sequence)
	revsequence = str(
	self.sequence.complement(inplace=False)
	) # TODO: remove inplace=False
	a = "\|"
	base, counter = 0, 0
	emptyline = " " * 60
	Join = "".join
	for base in range(60, length, 60):
	counter = base - 60
	line = emptyline
	for key in cutloc[counter]:
	s = ""
	if key == base:
	for n in enzymemap[key]:
	s = " ".join((s, n))
	chunk = line[0:59]
	lineo = Join((chunk, str(key), s, "\n"))
	line2 = Join((chunk, a, "\n"))
	linetot = Join((lineo, line2))
	map = Join((map, linetot))
	break
	for n in enzymemap[key]:
	s = " ".join((s, n))
	k = key % 60
	lineo = Join((line[0 : (k - 1)], str(key), s, "\n"))
	line = Join((line[0 : (k - 1)], a, line[k:]))
	line2 = Join((line[0 : (k - 1)], a, line[k:], "\n"))
	linetot = Join((lineo, line2))
	map = Join((map, linetot))
	mapunit = "\n".join(
	(
	sequence[counter:base],
	a * 60,
	revsequence[counter:base],
	Join(
	(
	str.ljust(str(counter + 1), 15),
	" " * 30,
	str.rjust(str(base), 15),
	"\n\n",
	)
	),
	)
	)
	map = Join((map, mapunit))
	line = " " * 60
	for key in cutloc[base]:
	s = ""
	if key == length:
	for n in enzymemap[key]:
	s = Join((s, " ", n))
	chunk = line[0 : (length - 1)]
	lineo = Join((chunk, str(key), s, "\n"))
	line2 = Join((chunk, a, "\n"))
	linetot = Join((lineo, line2))
	map = Join((map, linetot))
	break
	for n in enzymemap[key]:
	s = Join((s, " ", n))
	k = key % 60
	lineo = Join((line[0 : (k - 1)], str(key), s, "\n"))
	line = Join((line[0 : (k - 1)], a, line[k:]))
	line2 = Join((line[0 : (k - 1)], a, line[k:], "\n"))
	linetot = Join((lineo, line2))
	map = Join((map, linetot))
	mapunit = ""
	mapunit = Join((sequence[base:length], "\n"))
	mapunit = Join((mapunit, a * (length - base), "\n"))
	mapunit = Join((mapunit, revsequence[base:length], "\n"))
	mapunit = Join(
	(
	mapunit,
	Join(
	(
	str.ljust(str(base + 1), 15),
	" " * (length - base - 30),
	str.rjust(str(length), 15),
	"\n\n",
	)
	),
	)
	)
	map = Join((map, mapunit))
	return map

	# private method to do lists:

	def __next_section(self, ls, into):
	"""Next section (PRIVATE).

	Arguments:
	- ls is a tuple/list of tuple (string, [int, int]).
	- into is a string to which the formatted ls will be added.

	Format ls as a string of lines:
	The form is::

	enzyme1 : position1.
	enzyme2 : position2, position3.

	then add the formatted ls to tot
	return tot.
	"""
	indentation = "\n" + (self.NameWidth + self.Indent) * " "
	linesize = self.linesize - self.MaxSize
	pat = re.compile(r"([\w,\s()]){1,%i}[,\.]" % linesize)
	several, Join = "", "".join
	for name, sites in sorted(ls):
	stringsite = ""
	output = Join((", ".join(str(site) for site in sites), "."))
	if len(output) > linesize:
	#
	# cut where appropriate and add the indentation
	#
	output = [x.group() for x in re.finditer(pat, output)]
	stringsite = indentation.join(output)
	else:
	stringsite = output
	into = Join(
	(into, str(name).ljust(self.NameWidth), " : ", stringsite, "\n")
	)
	return into