Spaces:
Build error
Build error
Commit ·
a50857e
1
Parent(s): e568ca3
Filtering
Browse files
app.py
CHANGED
|
@@ -17,6 +17,7 @@ article = st.text_area('Article to analyze:', value=open("example.txt").read())
|
|
| 17 |
|
| 18 |
seen_entities = []
|
| 19 |
seen_surnames = []
|
|
|
|
| 20 |
if st.button('Submit'):
|
| 21 |
good_ents = []
|
| 22 |
|
|
@@ -25,18 +26,24 @@ if st.button('Submit'):
|
|
| 25 |
for ent in doc.ents:
|
| 26 |
if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities:
|
| 27 |
continue
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
if ent.
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
seen_surnames.append(ent.text.split()[-1])
|
| 37 |
|
| 38 |
seen_entities.append(ent.text)
|
| 39 |
print((ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata, ent._.nerd_score))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
r = requests.get("https://www.wikidata.org/w/api.php?action=wbgetclaims&format=json&property=P18&entity=" + ent._.kb_qid)
|
| 41 |
data = r.json()["claims"]
|
| 42 |
if "P18" in data.keys():
|
|
|
|
| 17 |
|
| 18 |
seen_entities = []
|
| 19 |
seen_surnames = []
|
| 20 |
+
seen_qids = []
|
| 21 |
if st.button('Submit'):
|
| 22 |
good_ents = []
|
| 23 |
|
|
|
|
| 26 |
for ent in doc.ents:
|
| 27 |
if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities:
|
| 28 |
continue
|
| 29 |
+
if ent._.nerd_score < 0.5:
|
| 30 |
+
continue
|
| 31 |
|
| 32 |
+
if len(ent.text.split()) == 1:
|
| 33 |
+
# Single name
|
| 34 |
+
if ent.text in seen_surnames:
|
| 35 |
+
continue
|
| 36 |
+
elif ent.label_ == "PERSON":
|
| 37 |
+
# Multipart name
|
| 38 |
+
seen_surnames.append(ent.text.split()[-1])
|
|
|
|
| 39 |
|
| 40 |
seen_entities.append(ent.text)
|
| 41 |
print((ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata, ent._.nerd_score))
|
| 42 |
+
|
| 43 |
+
if ent._.kb_qid in seen_qids:
|
| 44 |
+
continue
|
| 45 |
+
seen_qids.append(ent._.kb_qid)
|
| 46 |
+
|
| 47 |
r = requests.get("https://www.wikidata.org/w/api.php?action=wbgetclaims&format=json&property=P18&entity=" + ent._.kb_qid)
|
| 48 |
data = r.json()["claims"]
|
| 49 |
if "P18" in data.keys():
|