| |
| import argparse |
| import sys |
|
|
| IS_MEMBER, IS_PARENTHESIS_ROOT = 5, 3 |
|
|
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser() |
| parser.add_argument("paths", default=[], nargs="*", help="Input paths") |
| parser.add_argument("--is_member", default=False, action="store_true", help="Add is_member") |
| parser.add_argument("--is_parenthesis_root", default=False, action="store_true", help="Add is_parenthesis_root") |
| args = parser.parse_args() |
|
|
| for path in args.paths: |
| with open(path, "r", encoding="utf-8") as conllu_file: |
| block = [] |
| for line in conllu_file: |
| line = line.rstrip("\n") |
| if not line: |
| assert block |
| |
| heads, deps = {}, {} |
| for entry in block: |
| columns = entry.split("\t") |
| if len(columns) == 10: |
| assert int(columns[0]) == len(heads) + 1 |
| deps[len(heads) + 1] = columns[7] |
| heads[len(heads) + 1] = columns[6] |
|
|
| for i in range(len(block)): |
| columns = block[i].split("\t") |
| if len(columns) == 10: |
| if args.is_member and columns[IS_MEMBER] == "1": |
| parent = int(columns[0]) |
| while parent and deps[parent] not in ("Apos", "Coord"): |
| parent = int(heads[parent]) |
|
|
| if deps[parent] == "Apos": |
| columns[7] += "_Ap" |
| elif deps[parent] == "Coord": |
| columns[7] += "_Co" |
| else: |
| print("Did not find correct parent of IsMember {} in the below sentence".format(block[i]), *block, file=sys.stderr, sep="\n") |
| columns[IS_MEMBER] = "_" |
|
|
| if args.is_parenthesis_root and columns[IS_PARENTHESIS_ROOT] == "1": |
| columns[7] += "_Pa" |
| columns[IS_PARENTHESIS_ROOT] = "_" |
|
|
| block[i] = "\t".join(columns) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| print(*block, sep="\n", end="\n\n") |
| block = [] |
| else: |
| block.append(line) |
| assert not block |
|
|