|
|
| class Entity(object): |
| def __init__(self, _id, _text, _mask, _interactive, _type, _start_idx, _end_idx, _image=None): |
| self.id = _id |
| self.text = _text |
| self.mask = _mask |
| self.interactive = _interactive |
| self.type = _type |
| self.start_idx = _start_idx |
| self.end_idx = _end_idx |
|
|
| self.image = _image |
|
|
| def split_by_ordered_substrings(sentence, substrings): |
| results = [] |
| substring_indices = [] |
|
|
| start_index = 0 |
| for i, substring in enumerate(substrings): |
| |
| index = sentence[start_index:].find(substring) |
|
|
| if index == -1: |
| continue |
|
|
| |
| if index > 0: |
| results.append(sentence[start_index:start_index+index]) |
| substring_indices.append(None) |
| |
| |
| results.append(substring) |
| substring_indices.append(i) |
| start_index += index + len(substring) |
|
|
| |
| if start_index < len(sentence): |
| results.append(sentence[start_index:]) |
| substring_indices.append(None) |
|
|
| return results, substring_indices |
|
|