Spaces:
Build error
Build error
| import re | |
| from typing import Dict | |
| from enum import Enum | |
| class Entities(Enum): | |
| PAYER = "PAYER" | |
| PAYER_BANK_ACCOUNT = "PAYER_ACCOUNT" | |
| VPA = "VPA" | |
| MESSAGE = "MESSAGE" | |
| IFSCCODE = "IFSCCODE" | |
| UTR = "UTR" | |
| TXNMETHOD = "TXNMETHOD" | |
| BANK = "BANK" | |
| class StringUtils: | |
| def replace_multiple_spaces_with_single_space(cls, text): | |
| return re.sub(r'\s+', ' ', text).strip() | |
| def find_word_indices(cls, text, word): | |
| word = cls.replace_multiple_spaces_with_single_space(word) | |
| start_index = text.find(word) | |
| if start_index == -1: | |
| return None # Word not found | |
| end_index = start_index + len(word) - 1 | |
| return start_index, end_index | |
| def get_spacy_ref_for_word(cls, text, word, type): | |
| start_index, end_index = StringUtils.find_word_indices(text, word) | |
| return [start_index, end_index + 1, type] | |
| def get_spacy_dataset(cls, transaction: str, entities_name_to_type_map: Dict[str, Entities]): | |
| transaction_dataset = [] | |
| for entity_value in entities_name_to_type_map: | |
| if entities_name_to_type_map[entity_value] in [Entities.PAYER, Entities.UTR]: | |
| transaction_dataset.append(StringUtils.get_spacy_ref_for_word( | |
| text=transaction, | |
| word=entity_value, | |
| type=entities_name_to_type_map[entity_value].value | |
| )) | |
| return transaction_dataset | |