Spaces:
No application file
No application file
| # Copyright 2018 by Ariel Aptekmann. | |
| # All rights reserved. | |
| # | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Module for the support of MEME minimal motif format.""" | |
| from Bio import motifs | |
| def read(handle): | |
| """Parse the text output of the MEME program into a meme.Record object. | |
| Examples | |
| -------- | |
| >>> from Bio.motifs import minimal | |
| >>> with open("motifs/meme.out") as f: | |
| ... record = minimal.read(f) | |
| ... | |
| >>> for motif in record: | |
| ... print(motif.name, motif.evalue) | |
| ... | |
| 1 1.1e-22 | |
| You can access individual motifs in the record by their index or find a motif | |
| by its name: | |
| >>> from Bio import motifs | |
| >>> with open("motifs/minimal_test.meme") as f: | |
| ... record = motifs.parse(f, 'minimal') | |
| ... | |
| >>> motif = record[0] | |
| >>> print(motif.name) | |
| KRP | |
| >>> motif = record['IFXA'] | |
| >>> print(motif.name) | |
| IFXA | |
| This function won't retrieve instances, as there are none in minimal meme format. | |
| """ | |
| motif_number = 0 | |
| record = Record() | |
| _read_version(record, handle) | |
| _read_alphabet(record, handle) | |
| _read_background(record, handle) | |
| while True: | |
| for line in handle: | |
| if line.startswith("MOTIF"): | |
| break | |
| else: | |
| return record | |
| name = line.split()[1] | |
| motif_number += 1 | |
| length, num_occurrences, evalue = _read_motif_statistics(handle) | |
| counts = _read_lpm(handle, num_occurrences) | |
| # {'A': 0.25, 'C': 0.25, 'T': 0.25, 'G': 0.25} | |
| motif = motifs.Motif(alphabet=record.alphabet, counts=counts) | |
| motif.background = record.background | |
| motif.length = length | |
| motif.num_occurrences = num_occurrences | |
| motif.evalue = evalue | |
| motif.name = name | |
| record.append(motif) | |
| assert len(record) == motif_number | |
| return record | |
| class Record(list): | |
| """Class for holding the results of a minimal MEME run.""" | |
| def __init__(self): | |
| """Initialize record class values.""" | |
| self.version = "" | |
| self.datafile = "" | |
| self.command = "" | |
| self.alphabet = None | |
| self.background = {} | |
| self.sequences = [] | |
| def __getitem__(self, key): | |
| """Return the motif of index key.""" | |
| if isinstance(key, str): | |
| for motif in self: | |
| if motif.name == key: | |
| return motif | |
| else: | |
| return list.__getitem__(self, key) | |
| # Everything below is private | |
| def _read_background(record, handle): | |
| """Read background letter frequencies (PRIVATE).""" | |
| for line in handle: | |
| if line.startswith("Background letter frequencies"): | |
| break | |
| else: | |
| raise ValueError( | |
| "Improper input file. File should contain a line starting background frequencies." | |
| ) | |
| try: | |
| line = next(handle) | |
| except StopIteration: | |
| raise ValueError( | |
| "Unexpected end of stream: Expected to find line starting background frequencies." | |
| ) | |
| line = line.strip() | |
| ls = line.split() | |
| A, C, G, T = float(ls[1]), float(ls[3]), float(ls[5]), float(ls[7]) | |
| record.background = {"A": A, "C": C, "G": G, "T": T} | |
| def _read_version(record, handle): | |
| """Read MEME version (PRIVATE).""" | |
| for line in handle: | |
| if line.startswith("MEME version"): | |
| break | |
| else: | |
| raise ValueError( | |
| "Improper input file. File should contain a line starting MEME version." | |
| ) | |
| line = line.strip() | |
| ls = line.split() | |
| record.version = ls[2] | |
| def _read_alphabet(record, handle): | |
| """Read alphabet (PRIVATE).""" | |
| for line in handle: | |
| if line.startswith("ALPHABET"): | |
| break | |
| else: | |
| raise ValueError( | |
| "Unexpected end of stream: Expected to find line starting with 'ALPHABET'" | |
| ) | |
| if not line.startswith("ALPHABET= "): | |
| raise ValueError("Line does not start with 'ALPHABET':\n%s" % line) | |
| line = line.strip().replace("ALPHABET= ", "") | |
| if line == "ACGT": | |
| al = "ACGT" | |
| else: | |
| al = "ACDEFGHIKLMNPQRSTVWY" | |
| record.alphabet = al | |
| def _read_lpm(handle, num_occurrences): | |
| """Read letter probability matrix (PRIVATE).""" | |
| counts = [[], [], [], []] | |
| for line in handle: | |
| freqs = line.split() | |
| if len(freqs) != 4: | |
| break | |
| counts[0].append(round(float(freqs[0]) * num_occurrences)) | |
| counts[1].append(round(float(freqs[1]) * num_occurrences)) | |
| counts[2].append(round(float(freqs[2]) * num_occurrences)) | |
| counts[3].append(round(float(freqs[3]) * num_occurrences)) | |
| c = {} | |
| c["A"] = counts[0] | |
| c["C"] = counts[1] | |
| c["G"] = counts[2] | |
| c["T"] = counts[3] | |
| return c | |
| def _read_motif_statistics(handle): | |
| """Read motif statistics (PRIVATE).""" | |
| # minimal : | |
| # letter-probability matrix: alength= 4 w= 19 nsites= 17 E= 4.1e-009 | |
| for line in handle: | |
| if line.startswith("letter-probability matrix:"): | |
| break | |
| num_occurrences = int(line.split("nsites=")[1].split()[0]) | |
| length = int(line.split("w=")[1].split()[0]) | |
| evalue = float(line.split("E=")[1].split()[0]) | |
| return length, num_occurrences, evalue | |
| def _read_motif_name(handle): | |
| """Read motif name (PRIVATE).""" | |
| for line in handle: | |
| if "sorted by position p-value" in line: | |
| break | |
| else: | |
| raise ValueError("Unexpected end of stream: Failed to find motif name") | |
| line = line.strip() | |
| words = line.split() | |
| name = " ".join(words[0:2]) | |
| return name | |
| if __name__ == "__main__": | |
| from Bio._utils import run_doctest | |
| run_doctest() | |