def fix_empty_fields(input_file, output_file): with open(input_file, 'r', encoding='utf-8') as f_in: with open(output_file, 'w', encoding='utf-8') as f_out: for line in f_in: if not line.strip() or line.startswith('#'): f_out.write(line) continue fields = line.strip().split('\t') if len(fields) >= 10: # some ufeats in punctuation are an empty string instead of "_", stanza cannot deal with this for i in range(len(fields)): if fields[i] == "": fields[i] = "_" line = '\t'.join(fields) + '\n' f_out.write(line) print(f"Fixed {input_file} → {output_file}") fix_empty_fields("../la_giuseppe/train.conllu", "../la_giuseppe/train_fixed.conllu") fix_empty_fields("../la_giuseppe/dev.conllu", "../la_giuseppe/dev_fixed.conllu") fix_empty_fields("../la_giuseppe/test.conllu", "../la_giuseppe/test_fixed.conllu")