| def bio_2_json_one(anno_txt):
|
| ls = anno_txt.split('\n')
|
| text = ''
|
| anno = []
|
| now_label = ''
|
| for i, l in enumerate(ls):
|
| char, label = l.split('\t')
|
| text += char
|
| if 'B-' in label:
|
| start = i
|
| now_label = label.split('-')[1]
|
| if label == 'O':
|
| if now_label:
|
| anno.append([start, i, text[start:i], now_label])
|
| now_label = ''
|
| start = 0
|
| if now_label:
|
| i += 1
|
| anno.append([start, i, text[start:i], now_label])
|
| return {'text': text, 'anno': anno}
|
|
|
|
|
| def bit_2_json(txt):
|
| anno_txts = txt.split('\n\n')
|
| annos = []
|
| for anno_txt in anno_txts:
|
| if anno_txt == '':
|
| continue
|
| anno_j = bio_2_json_one(anno_txt)
|
| annos.append(anno_j)
|
| return annos
|
|
|
|
|
| if __name__ == '__main__':
|
| txt = '''你\tB-PER
|
| 是\tO
|
| 一\tO
|
| 个\tO
|
| 聪\tB-PER
|
| 明\tI-PER
|
| 的\tO
|
| 软\tB-ORG
|
| 件\tI-ORG
|
| 工\tI-ORG
|
| 程\tI-ORG
|
| 师\tI-ORG'''
|
|
|
| annos = bit_2_json(txt)
|
| print(annos)
|
|
|