Spaces:
Runtime error
Runtime error
| from flair.data import Sentence | |
| from flair.models import SequenceTagger | |
| from NERDA.models import NERDA | |
| from hazm import word_tokenize | |
| import flair | |
| import utils | |
| class KPE: | |
| def __init__(self, trained_kpe_model, flair_ner_model, device='cpu') -> None: | |
| self.extractor_model = NERDA( | |
| tag_scheme = ['B-KEYWORD', 'I-KEYWORD'], | |
| tag_outside = 'O', | |
| transformer = 'xlm-roberta-large', | |
| max_len=512, | |
| device=device) | |
| flair.device = device | |
| self.extractor_model.load_network_from_file(trained_kpe_model) | |
| self.ner_tagger = SequenceTagger.load(flair_ner_model) | |
| self.IGNORE_TAGS = {'ORDINAL', 'DATE', 'CARDINAL'} | |
| def combine_keywords_nes(init_keywords, nes): | |
| # init_keywords = list(set(init_keywords)) | |
| nes = list(set(nes)) | |
| print('nes before combined ', nes) | |
| combined_keywords = [] | |
| for kw in init_keywords: | |
| matched_index = utils.fuzzy_subword_match(kw, nes) | |
| if matched_index != -1: | |
| print(kw, nes[matched_index]) | |
| combined_keywords.append(nes[matched_index]) | |
| del nes[matched_index] | |
| else: | |
| combined_keywords.append(kw) | |
| print('nes after combined ', nes) | |
| combined_keywords.extend([n for n in nes if n not in combined_keywords]) | |
| return combined_keywords | |
| def extract(self, txt, using_ner=True): | |
| sentence = Sentence(txt) | |
| # predict NER tags | |
| if using_ner: | |
| self.ner_tagger.predict(sentence) | |
| nes = [entity.text for entity in sentence.get_spans('ner') if entity.tag not in self.IGNORE_TAGS] | |
| else: | |
| nes = [] | |
| #remove puncs | |
| nes = list(map(utils.remove_puncs, nes)) | |
| print('nes ', nes) | |
| sentences, tags_conf = self.extractor_model.predict_text(txt, sent_tokenize=lambda txt: [txt], word_tokenize=lambda txt: txt.split(), return_confidence=True) | |
| init_keywords = utils.get_ne_from_iob_output(sentences, tags_conf) | |
| init_keywords = list(map(utils.remove_puncs, init_keywords)) | |
| print('init keywords : ', init_keywords) | |
| # combine ner response and init keywords | |
| merged_keywords = self.combine_keywords_nes(init_keywords, nes) | |
| #set but keep order | |
| final_keywords = [] | |
| for kw in merged_keywords: | |
| if kw not in final_keywords: | |
| final_keywords.append(kw) | |
| return final_keywords |