Spaces:
Build error
Build error
File size: 1,570 Bytes
c879739 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import re
from typing import Dict
from enum import Enum
class Entities(Enum):
PAYER = "PAYER"
PAYER_BANK_ACCOUNT = "PAYER_ACCOUNT"
VPA = "VPA"
MESSAGE = "MESSAGE"
IFSCCODE = "IFSCCODE"
UTR = "UTR"
TXNMETHOD = "TXNMETHOD"
BANK = "BANK"
class StringUtils:
@classmethod
def replace_multiple_spaces_with_single_space(cls, text):
return re.sub(r'\s+', ' ', text).strip()
@classmethod
def find_word_indices(cls, text, word):
word = cls.replace_multiple_spaces_with_single_space(word)
start_index = text.find(word)
if start_index == -1:
return None # Word not found
end_index = start_index + len(word) - 1
return start_index, end_index
@classmethod
def get_spacy_ref_for_word(cls, text, word, type):
start_index, end_index = StringUtils.find_word_indices(text, word)
return [start_index, end_index + 1, type]
@classmethod
def get_spacy_dataset(cls, transaction: str, entities_name_to_type_map: Dict[str, Entities]):
transaction_dataset = []
for entity_value in entities_name_to_type_map:
if entities_name_to_type_map[entity_value] in [Entities.PAYER, Entities.UTR]:
transaction_dataset.append(StringUtils.get_spacy_ref_for_word(
text=transaction,
word=entity_value,
type=entities_name_to_type_map[entity_value].value
))
return transaction_dataset
|