Spaces:
No application file
No application file
| # Copyright 2004 by Bob Bussell. All rights reserved. | |
| # This code is part of the Biopython distribution and governed by its | |
| # license. Please see the LICENSE file that should have been included | |
| # as part of this package. | |
| """Tools to manipulate data from nmrview .xpk peaklist files.""" | |
| HEADERLEN = 6 | |
| class XpkEntry: | |
| """Provide dictionary access to single entry from nmrview .xpk file. | |
| This class is suited for handling single lines of non-header data | |
| from an nmrview .xpk file. This class provides methods for extracting | |
| data by the field name which is listed in the last line of the | |
| peaklist header. | |
| Parameters | |
| ---------- | |
| xpkentry : str | |
| The line from an nmrview .xpk file. | |
| xpkheadline : str | |
| The line from the header file that gives the names of the entries. | |
| This is typically the sixth line of the header, 1-origin. | |
| Attributes | |
| ---------- | |
| fields : dict | |
| Dictionary of fields where key is in header line, value is an entry. | |
| Variables are accessed by either their name in the header line as in | |
| self.field["H1.P"] will return the H1.P entry for example. | |
| self.field["entrynum"] returns the line number (1st field of line) | |
| """ | |
| def __init__(self, entry, headline): | |
| """Initialize the class.""" | |
| # Holds all fields from input line in a dictionary | |
| # keys are data labels from the .xpk header | |
| datlist = entry.split() | |
| headlist = headline.split() | |
| self.fields = dict(zip(headlist, datlist[1:])) | |
| try: | |
| self.fields["entrynum"] = datlist[0] | |
| except IndexError: | |
| pass | |
| class Peaklist: | |
| """Provide access to header lines and data from a nmrview xpk file. | |
| Header file lines and file data are available as attributes. | |
| Parameters | |
| ---------- | |
| infn : str | |
| The input nmrview filename. | |
| Attributes | |
| ---------- | |
| firstline : str | |
| The first line in the header. | |
| axislabels : str | |
| The axis labels. | |
| dataset : str | |
| The label of the dataset. | |
| sw : str | |
| The sw coordinates. | |
| sf : str | |
| The sf coordinates. | |
| datalabels : str | |
| The labels of the entries. | |
| data : list | |
| File data after header lines. | |
| Examples | |
| -------- | |
| >>> from Bio.NMR.xpktools import Peaklist | |
| >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk') | |
| >>> peaklist.firstline | |
| 'label dataset sw sf ' | |
| >>> peaklist.dataset | |
| 'test.nv' | |
| >>> peaklist.sf | |
| '{599.8230 } { 60.7860 } { 60.7860 }' | |
| >>> peaklist.datalabels | |
| ' H1.L H1.P H1.W H1.B H1.E H1.J 15N2.L 15N2.P 15N2.W 15N2.B 15N2.E 15N2.J N15.L N15.P N15.W N15.B N15.E N15.J vol int stat ' | |
| """ | |
| def __init__(self, infn): | |
| """Initialize the class.""" | |
| with open(infn) as infile: | |
| # Read in the header lines | |
| self.firstline = infile.readline().split("\012")[0] | |
| self.axislabels = infile.readline().split("\012")[0] | |
| self.dataset = infile.readline().split("\012")[0] | |
| self.sw = infile.readline().split("\012")[0] | |
| self.sf = infile.readline().split("\012")[0] | |
| self.datalabels = infile.readline().split("\012")[0] | |
| # Read in the data lines to a list | |
| self.data = [line.split("\012")[0] for line in infile] | |
| def residue_dict(self, index): | |
| """Return a dict of lines in 'data' indexed by residue number or a nucleus. | |
| The nucleus should be given as the input argument in the same form as | |
| it appears in the xpk label line (H1, 15N for example) | |
| Parameters | |
| ---------- | |
| index : str | |
| The nucleus to index data by. | |
| Returns | |
| ------- | |
| resdict : dict | |
| Mappings of index nucleus to data line. | |
| Examples | |
| -------- | |
| >>> from Bio.NMR.xpktools import Peaklist | |
| >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk') | |
| >>> residue_d = peaklist.residue_dict('H1') | |
| >>> sorted(residue_d.keys()) | |
| ['10', '3', '4', '5', '6', '7', '8', '9', 'maxres', 'minres'] | |
| >>> residue_d['10'] | |
| ['8 10.hn 7.663 0.021 0.010 ++ 0.000 10.n 118.341 0.324 0.010 +E 0.000 10.n 118.476 0.324 0.010 +E 0.000 0.49840 0.49840 0'] | |
| """ | |
| maxres = -1 | |
| minres = -1 | |
| # Cast the data lines into the xpentry class | |
| self.dict = {} | |
| for line in self.data: | |
| ind = XpkEntry(line, self.datalabels).fields[index + ".L"] | |
| key = ind.split(".")[0] | |
| res = int(key) | |
| if maxres == -1: | |
| maxres = res | |
| if minres == -1: | |
| minres = res | |
| maxres = max([maxres, res]) | |
| minres = min([minres, res]) | |
| res = str(res) | |
| try: | |
| # Append additional data to list under same key | |
| self.dict[res].append(line) | |
| except KeyError: | |
| # This is a new residue, start a new list | |
| self.dict[res] = [line] # Use [] for list type | |
| self.dict["maxres"] = maxres | |
| self.dict["minres"] = minres | |
| return self.dict | |
| def write_header(self, outfn): | |
| """Write header lines from input file to handle ``outfn``.""" | |
| with open(outfn, "w") as outfile: | |
| outfile.write(self.firstline) | |
| outfile.write("\012") | |
| outfile.write(self.axislabels) | |
| outfile.write("\012") | |
| outfile.write(self.dataset) | |
| outfile.write("\012") | |
| outfile.write(self.sw) | |
| outfile.write("\012") | |
| outfile.write(self.sf) | |
| outfile.write("\012") | |
| outfile.write(self.datalabels) | |
| outfile.write("\012") | |
| def replace_entry(line, fieldn, newentry): | |
| """Replace an entry in a string by the field number. | |
| No padding is implemented currently. Spacing will change if | |
| the original field entry and the new field entry are of | |
| different lengths. | |
| """ | |
| # This method depends on xpktools._find_start_entry | |
| start = _find_start_entry(line, fieldn) | |
| leng = len(line[start:].split()[0]) | |
| newline = line[:start] + str(newentry) + line[(start + leng) :] | |
| return newline | |
| def _find_start_entry(line, n): | |
| """Find the starting character for entry ``n`` in a space delimited ``line`` (PRIVATE). | |
| n is counted starting with 1. | |
| The n=1 field by definition begins at the first character. | |
| Returns | |
| ------- | |
| starting character : str | |
| The starting character for entry ``n``. | |
| """ | |
| # This function is used by replace_entry | |
| if n == 1: | |
| return 0 # Special case | |
| # Count the number of fields by counting spaces | |
| c = 1 | |
| leng = len(line) | |
| # Initialize variables according to whether the first character | |
| # is a space or a character | |
| if line[0] == " ": | |
| infield = False | |
| field = 0 | |
| else: | |
| infield = True | |
| field = 1 | |
| while c < leng and field < n: | |
| if infield: | |
| if line[c] == " " and line[c - 1] != " ": | |
| infield = False | |
| else: | |
| if line[c] != " ": | |
| infield = True | |
| field += 1 | |
| c += 1 | |
| return c - 1 | |
| def data_table(fn_list, datalabel, keyatom): | |
| """Generate a data table from a list of input xpk files. | |
| Parameters | |
| ---------- | |
| fn_list : list | |
| List of .xpk file names. | |
| datalabel : str | |
| The data element reported. | |
| keyatom : str | |
| The name of the nucleus used as an index for the data table. | |
| Returns | |
| ------- | |
| outlist : list | |
| List of table rows indexed by ``keyatom``. | |
| """ | |
| # TODO - Clarify this docstring, add an example? | |
| outlist = [] | |
| dict_list, label_line_list = _read_dicts(fn_list, keyatom) | |
| # Find global max and min residue numbers | |
| minr = dict_list[0]["minres"] | |
| maxr = dict_list[0]["maxres"] | |
| for dictionary in dict_list: | |
| if maxr < dictionary["maxres"]: | |
| maxr = dictionary["maxres"] | |
| if minr > dictionary["minres"]: | |
| minr = dictionary["minres"] | |
| res = minr | |
| while res <= maxr: # s.t. res numbers | |
| count = 0 | |
| key = str(res) | |
| line = key | |
| for dictionary in dict_list: # s.t. dictionaries | |
| label = label_line_list[count] | |
| if key in dictionary: | |
| line = ( | |
| line + "\t" + XpkEntry(dictionary[key][0], label).fields[datalabel] | |
| ) | |
| else: | |
| line += "\t*" | |
| count += 1 | |
| line += "\n" | |
| outlist.append(line) | |
| res += 1 | |
| return outlist | |
| def _read_dicts(fn_list, keyatom): | |
| """Read multiple files into a list of residue dictionaries (PRIVATE).""" | |
| dict_list = [] | |
| datalabel_list = [] | |
| for fn in fn_list: | |
| peaklist = Peaklist(fn) | |
| dictionary = peaklist.residue_dict(keyatom) | |
| dict_list.append(dictionary) | |
| datalabel_list.append(peaklist.datalabels) | |
| return [dict_list, datalabel_list] | |
| if __name__ == "__main__": | |
| from Bio._utils import run_doctest | |
| run_doctest() | |