File size: 1,592 Bytes
8d3380d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import gc
from NER_Wrapper.NameExtractors import NERWrapper,NextPassNERWrapper

class FullNERPipeline:
    def __init__(self):
        """

        Initializes the FullNERPipeline with paths to the model and idx2tag configuration.

        

        Parameters:

        model_path (str): Path to the pre-trained NER model.

        idx2tag_path (str): Path to the index-to-tag mapping file.

        """
        # Initialize the NERWrapper with the provided model and idx2tag path.
        self.ner_wrapper = NERWrapper('models/NER_Models/torch_model.pth','models/NER_Models/idx2tag.json')
        
        # Initialize the NextPassNERWrapper which uses a different pre-trained model.
        self.next_ner_wrapper = NextPassNERWrapper()

    def process_text(self, text):
        """

        Processes the input text through two stages of NER processing and returns processed sentences.

        

        Parameters:

        text (str): The input text to be processed for named entity recognition.

        

        Returns:

        list of str: The list of processed sentences with entities tagged or tokens modified.

        """
        # First, evaluate the text using the initial NER model.
        evaluated_text = self.ner_wrapper.evaluate_text(text.split('.'))

        # Next, process the sentences through the second NER pass.
        ner_text = self.next_ner_wrapper.process_sentences(evaluated_text)

        # Manually collect garbage to manage memory when dealing with large models or data.
        gc.collect()

        return " ".join(ner_text)