| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import shlex |
| | from dataclasses import dataclass |
| |
|
| |
|
| | @dataclass |
| | class PeekedLine: |
| | line: str |
| | next_position: int |
| |
|
| |
|
| | def peek_line(f, peeked: PeekedLine, rewind=True): |
| | ret = True |
| | pos = f.tell() |
| | line = f.readline() |
| | if line == "": |
| | ret = False |
| | elif line[-1] == "\n": |
| | line = line[:-1] |
| | peeked.line = line |
| | if rewind: |
| | peeked.next_position = f.tell() |
| | f.seek(pos) |
| | else: |
| | peeked.next_position = pos |
| | return ret |
| |
|
| |
|
| | def advance(f, peeked: PeekedLine): |
| | f.seek(peeked.next_position) |
| |
|
| |
|
| | def star_item_parse(line: str): |
| | parts = line.split(".") |
| | if len(parts) < 2: |
| | raise Exception(f"expected at least two parts in the STAR data line {line}") |
| | cat = parts[0] |
| | name_parts = parts[1].split() |
| | name = name_parts[0] |
| | if len(name_parts) >= 2: |
| | val = name_parts[1] |
| | else: |
| | val = "" |
| | return (cat, name, val) |
| |
|
| |
|
| | def star_read_data(f, names: list, in_loop: bool, cols=False, has_blocks=True): |
| | tab = [] |
| | line = "" |
| | if cols: |
| | tab = [[] for _ in range(len(names))] |
| | peeked = PeekedLine("", 0) |
| | if in_loop: |
| | heads = [] |
| | while peek_line(f, peeked): |
| | if not peeked.line.startswith("_"): |
| | break |
| | parts = peeked.line.split(".") |
| | if len(parts) != 2: |
| | raise Exception(f"expected two parts in the STAR data line {line}") |
| | heads.append(parts[1].strip()) |
| | advance(f, peeked) |
| |
|
| | |
| | indices = [-1] * len(names) |
| | for i, name in enumerate(names): |
| | if name in heads: |
| | indices[i] = heads.index(name) |
| |
|
| | |
| | row = [None] * len(heads) |
| | ma = max(indices) |
| | while star_read_data_row(f, row, in_loop, has_blocks): |
| | if (ma >= 0) and (len(row) <= ma): |
| | raise Exception(f"loop row has insufficient elements: {line}") |
| | if not cols: |
| | tab.append([""] * len(names)) |
| | for i, index in enumerate(indices): |
| | if cols: |
| | tab[i].append(row[index] if index >= 0 else "") |
| | else: |
| | tab[-1][i] = row[index] if index >= 0 else "" |
| | else: |
| | if not cols: |
| | tab = [[""] * len(names)] |
| | category, cat, name = "", "", "" |
| |
|
| | row = ["", ""] |
| | while star_read_data_row(f, row, in_loop, has_blocks, peeked): |
| | cat, name, _ = star_item_parse(row[0]) |
| | if category == "": |
| | category = cat |
| | elif category != cat: |
| | advance(f, peeked) |
| | break |
| |
|
| | if name not in names: |
| | continue |
| | idx = names.index(name) |
| | if cols: |
| | tab[idx].push_back(row[1]) |
| | else: |
| | tab[0][idx] = row[1] |
| |
|
| | return tab |
| |
|
| |
|
| | def star_read_data_row( |
| | f, row: list, in_loop: bool, has_blocks: bool, peeked: PeekedLine = None |
| | ): |
| | i = 0 |
| | ret = True |
| | if peeked is None: |
| | peeked = PeekedLine("", 0) |
| | while i < len(row): |
| | if not peek_line(f, peeked, rewind=False): |
| | if peeked.line == "" and i == 0: |
| | return False |
| | raise Exception(f"read {i} tokens when {len(row)} were requested: {row}") |
| | if ( |
| | peeked.line.startswith("loop_") |
| | or peeked.line.startswith("data_") |
| | or (in_loop and peeked.line.startswith("_")) |
| | ): |
| | if i == 0: |
| | advance(f, peeked) |
| | return False |
| | raise Exception( |
| | f"data block ended while reading requested number of tokens: {len(row)}" |
| | ) |
| |
|
| | if peeked.line.startswith(";"): |
| | row[i] = peeked.line[1:] |
| | while peek_line(f, peeked, rewind=False): |
| | if peeked.line.startswith(";"): |
| | break |
| | row[i] += peeked.line |
| | i = i + 1 |
| | elif peeked.line.startswith("#"): |
| | pass |
| | else: |
| | elems = ( |
| | [part for part in shlex.split(peeked.line.strip())] |
| | if has_blocks |
| | else peeked.line.strip().split() |
| | ) |
| | if i + len(elems) > len(row): |
| | raise Exception( |
| | f"too many elements when trying to read {len(row)} tokens; last read: {elems}, row was: {row}, i = {i}" |
| | ) |
| | for elem in elems: |
| | row[i] = elem |
| | i = i + 1 |
| |
|
| | return ret |
| |
|
| |
|
| | def star_string_escape(text): |
| | |
| | |
| | |
| | |
| | has_space = (" " in text) or (text == "") or ((len(text) > 0) and (text[0] == "_")) |
| | has_single = "'" in text |
| | has_double = '"' in text |
| |
|
| | if not has_single and not has_double: |
| | if not has_space: |
| | return text |
| | else: |
| | return f"'{text}'" |
| | elif not has_single: |
| | return f"'{text}'" |
| | elif not has_double: |
| | return '"' + text + '"' |
| | return "\n;" + str + "\n;" |
| |
|
| |
|
| | def star_loop_header_write(f, category, names): |
| | f.write("loop_\n") |
| | for name in names: |
| | f.write(f"{category}.{name} \n") |
| |
|
| |
|
| | def star_value_defined(val): |
| | return (val != ".") and (val != "?") |
| |
|
| |
|
| | def star_value(val, default): |
| | if star_value_defined(val): |
| | return val |
| | return default |
| |
|
| |
|
| | def atom_site_token(value): |
| | return "." if value == " " else value |
| |
|