File size: 1,570 Bytes
c879739
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import re
from typing import Dict
from enum import Enum


class Entities(Enum):
    PAYER = "PAYER"
    PAYER_BANK_ACCOUNT = "PAYER_ACCOUNT"
    VPA = "VPA"
    MESSAGE = "MESSAGE"
    IFSCCODE = "IFSCCODE"
    UTR = "UTR"
    TXNMETHOD = "TXNMETHOD"
    BANK = "BANK"


class StringUtils:
    @classmethod
    def replace_multiple_spaces_with_single_space(cls, text):
        return re.sub(r'\s+', ' ', text).strip()

    @classmethod
    def find_word_indices(cls, text, word):
        word = cls.replace_multiple_spaces_with_single_space(word)

        start_index = text.find(word)

        if start_index == -1:
            return None  # Word not found

        end_index = start_index + len(word) - 1
        return start_index, end_index

    @classmethod
    def get_spacy_ref_for_word(cls, text, word, type):
        start_index, end_index = StringUtils.find_word_indices(text, word)
        return [start_index, end_index + 1, type]

    @classmethod
    def get_spacy_dataset(cls, transaction: str, entities_name_to_type_map: Dict[str, Entities]):
        transaction_dataset = []
        for entity_value in entities_name_to_type_map:
            if entities_name_to_type_map[entity_value] in [Entities.PAYER, Entities.UTR]:
                transaction_dataset.append(StringUtils.get_spacy_ref_for_word(
                    text=transaction,
                    word=entity_value,
                    type=entities_name_to_type_map[entity_value].value
                ))

        return transaction_dataset