Spaces:
Runtime error
Runtime error
| import spacy | |
| from negspacy.negation import Negex | |
| from spacy.matcher import PhraseMatcher | |
| from spacy.tokens import Span | |
| def negation(model: spacy, entities: list): | |
| """ | |
| Take in the current model pipeline and add in Negation model. | |
| Add in entities to the negation model | |
| Parameters: | |
| model: spacy model | |
| entities: list of entities | |
| Returns: | |
| model: spacy model with Negation added to the pipeline | |
| """ | |
| if 'parser' in model.pipe_names: | |
| model.remove_pipe('parser') | |
| #nlp.add_pipe(nlp.create_pipe('sentencizer')) | |
| if 'sentencizer' not in model.pipe_names: | |
| model.add_pipe('sentencizer') | |
| #negex = Negex(nlp) | |
| if 'negex' not in model.pipe_names: | |
| model.add_pipe('negex',config=entities) | |
| return model | |
| def infer_negation(neg_model: spacy, model: spacy, text: str ,pred_doc: spacy): | |
| """ | |
| To match results from the negation model with the results from the model. | |
| Replace the entity type of the spans or tokens in the predictions doc | |
| that should be negated with entity type "NEG". | |
| Parameters: | |
| neg_model: spacy negation model | |
| model: spacy model | |
| text: text sample | |
| pred_doc: prediction of the text sample from model | |
| Returns: | |
| pred_doc: spacy doc with all entities that should be negated replaced with the "NEG" entity type | |
| """ | |
| doc = neg_model(text) | |
| results = {'ent':[],'start':[], 'end':[]} | |
| for e in doc.ents: | |
| rs = str(e._.negex) | |
| if rs == "True": | |
| results['ent'].append(e.text) | |
| results['start'].append(e.start) | |
| results['end'].append(e.end) | |
| print('Negation: ', results) | |
| patterns = [model.make_doc(text) for text in results['ent']] | |
| matcher = PhraseMatcher(model.vocab) | |
| matcher.add('NEG', None, *patterns) | |
| # match all the tokens or span of text detected to be negated with the prediction doc. | |
| matches = matcher(pred_doc) | |
| seen_tokens = set() | |
| new_entities = [] | |
| entities = pred_doc.ents | |
| # to get exact matches: not only the span or word matches but also location | |
| for match in results['start']: | |
| count = 0 | |
| for match_id, start, end in matches: | |
| if match == start: | |
| new_entities.append(Span(pred_doc, start, end, label=match_id)) | |
| entities = [ | |
| e for e in entities if not (e.start < end and e.end > start) | |
| ] | |
| seen_tokens.update(range(start, end)) | |
| matches.pop(count) | |
| count += 1 | |
| pred_doc.ents = tuple(entities) + tuple(new_entities) | |
| return pred_doc |