Spaces:
Build error
Build error
Commit ·
8b1561c
1
Parent(s): 8014fee
Remove plural names
Browse files- entity_extraction.py +10 -0
entity_extraction.py
CHANGED
|
@@ -3,6 +3,15 @@ import spacy
|
|
| 3 |
nlp = spacy.load("en_core_web_md")
|
| 4 |
nlp.add_pipe("entityfishing")
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
def extract_entities(article):
|
| 8 |
'''Find wikidata refs for article entities'''
|
|
@@ -11,6 +20,7 @@ def extract_entities(article):
|
|
| 11 |
seen_surnames = []
|
| 12 |
seen_qids = []
|
| 13 |
|
|
|
|
| 14 |
doc = nlp(article)
|
| 15 |
for ent in doc.ents:
|
| 16 |
if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities:
|
|
|
|
| 3 |
nlp = spacy.load("en_core_web_md")
|
| 4 |
nlp.add_pipe("entityfishing")
|
| 5 |
|
| 6 |
+
def remove_plural_names(article):
|
| 7 |
+
words = article.split()
|
| 8 |
+
new_words = []
|
| 9 |
+
for word in words:
|
| 10 |
+
word = word.replace("’s", "")
|
| 11 |
+
word = word.replace("'s", "")
|
| 12 |
+
new_words.append(word)
|
| 13 |
+
return " ".join(new_words)
|
| 14 |
+
|
| 15 |
|
| 16 |
def extract_entities(article):
|
| 17 |
'''Find wikidata refs for article entities'''
|
|
|
|
| 20 |
seen_surnames = []
|
| 21 |
seen_qids = []
|
| 22 |
|
| 23 |
+
article = remove_plural_names(article)
|
| 24 |
doc = nlp(article)
|
| 25 |
for ent in doc.ents:
|
| 26 |
if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities:
|