Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /motifs /minimal.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 2 years ago

raw

history blame contribute delete

5.86 kB

	# Copyright 2018 by Ariel Aptekmann.
	# All rights reserved.
	#
	# This file is part of the Biopython distribution and governed by your
	# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
	# Please see the LICENSE file that should have been included as part of this
	# package.
	"""Module for the support of MEME minimal motif format."""

	from Bio import motifs


	def read(handle):
	"""Parse the text output of the MEME program into a meme.Record object.

	Examples
	--------
	>>> from Bio.motifs import minimal
	>>> with open("motifs/meme.out") as f:
	... record = minimal.read(f)
	...
	>>> for motif in record:
	... print(motif.name, motif.evalue)
	...
	1 1.1e-22

	You can access individual motifs in the record by their index or find a motif
	by its name:

	>>> from Bio import motifs
	>>> with open("motifs/minimal_test.meme") as f:
	... record = motifs.parse(f, 'minimal')
	...
	>>> motif = record[0]
	>>> print(motif.name)
	KRP
	>>> motif = record['IFXA']
	>>> print(motif.name)
	IFXA

	This function won't retrieve instances, as there are none in minimal meme format.

	"""
	motif_number = 0
	record = Record()
	_read_version(record, handle)
	_read_alphabet(record, handle)
	_read_background(record, handle)

	while True:
	for line in handle:
	if line.startswith("MOTIF"):
	break
	else:
	return record
	name = line.split()[1]
	motif_number += 1
	length, num_occurrences, evalue = _read_motif_statistics(handle)
	counts = _read_lpm(handle, num_occurrences)
	# {'A': 0.25, 'C': 0.25, 'T': 0.25, 'G': 0.25}
	motif = motifs.Motif(alphabet=record.alphabet, counts=counts)
	motif.background = record.background
	motif.length = length
	motif.num_occurrences = num_occurrences
	motif.evalue = evalue
	motif.name = name
	record.append(motif)
	assert len(record) == motif_number
	return record


	class Record(list):
	"""Class for holding the results of a minimal MEME run."""

	def __init__(self):
	"""Initialize record class values."""
	self.version = ""
	self.datafile = ""
	self.command = ""
	self.alphabet = None
	self.background = {}
	self.sequences = []

	def __getitem__(self, key):
	"""Return the motif of index key."""
	if isinstance(key, str):
	for motif in self:
	if motif.name == key:
	return motif
	else:
	return list.__getitem__(self, key)


	# Everything below is private


	def _read_background(record, handle):
	"""Read background letter frequencies (PRIVATE)."""
	for line in handle:
	if line.startswith("Background letter frequencies"):
	break
	else:
	raise ValueError(
	"Improper input file. File should contain a line starting background frequencies."
	)
	try:
	line = next(handle)
	except StopIteration:
	raise ValueError(
	"Unexpected end of stream: Expected to find line starting background frequencies."
	)
	line = line.strip()
	ls = line.split()
	A, C, G, T = float(ls[1]), float(ls[3]), float(ls[5]), float(ls[7])
	record.background = {"A": A, "C": C, "G": G, "T": T}


	def _read_version(record, handle):
	"""Read MEME version (PRIVATE)."""
	for line in handle:
	if line.startswith("MEME version"):
	break
	else:
	raise ValueError(
	"Improper input file. File should contain a line starting MEME version."
	)
	line = line.strip()
	ls = line.split()
	record.version = ls[2]


	def _read_alphabet(record, handle):
	"""Read alphabet (PRIVATE)."""
	for line in handle:
	if line.startswith("ALPHABET"):
	break
	else:
	raise ValueError(
	"Unexpected end of stream: Expected to find line starting with 'ALPHABET'"
	)
	if not line.startswith("ALPHABET= "):
	raise ValueError("Line does not start with 'ALPHABET':\n%s" % line)
	line = line.strip().replace("ALPHABET= ", "")
	if line == "ACGT":
	al = "ACGT"
	else:
	al = "ACDEFGHIKLMNPQRSTVWY"
	record.alphabet = al


	def _read_lpm(handle, num_occurrences):
	"""Read letter probability matrix (PRIVATE)."""
	counts = [[], [], [], []]
	for line in handle:
	freqs = line.split()
	if len(freqs) != 4:
	break
	counts[0].append(round(float(freqs[0]) * num_occurrences))
	counts[1].append(round(float(freqs[1]) * num_occurrences))
	counts[2].append(round(float(freqs[2]) * num_occurrences))
	counts[3].append(round(float(freqs[3]) * num_occurrences))
	c = {}
	c["A"] = counts[0]
	c["C"] = counts[1]
	c["G"] = counts[2]
	c["T"] = counts[3]
	return c


	def _read_motif_statistics(handle):
	"""Read motif statistics (PRIVATE)."""
	# minimal :
	# letter-probability matrix: alength= 4 w= 19 nsites= 17 E= 4.1e-009
	for line in handle:
	if line.startswith("letter-probability matrix:"):
	break
	num_occurrences = int(line.split("nsites=")[1].split()[0])
	length = int(line.split("w=")[1].split()[0])
	evalue = float(line.split("E=")[1].split()[0])
	return length, num_occurrences, evalue


	def _read_motif_name(handle):
	"""Read motif name (PRIVATE)."""
	for line in handle:
	if "sorted by position p-value" in line:
	break
	else:
	raise ValueError("Unexpected end of stream: Failed to find motif name")
	line = line.strip()
	words = line.split()
	name = " ".join(words[0:2])
	return name


	if __name__ == "__main__":
	from Bio._utils import run_doctest

	run_doctest()