Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -46,7 +46,7 @@ def recognize_entities_auto(text, gold_entities=None):
|
|
| 46 |
results = english_ner(text)
|
| 47 |
for res in results:
|
| 48 |
label = res['entity_group']
|
| 49 |
-
word = res['word']
|
| 50 |
if label in ["PER", "ORG", "LOC", "GPE"]:
|
| 51 |
if label == "GPE":
|
| 52 |
label = "LOC"
|
|
@@ -58,7 +58,7 @@ def recognize_entities_auto(text, gold_entities=None):
|
|
| 58 |
for span in doc.spans:
|
| 59 |
label = span.type
|
| 60 |
if label in ["PER", "ORG", "LOC"]:
|
| 61 |
-
entities[label].append(span.text)
|
| 62 |
|
| 63 |
# Убираем дубликаты
|
| 64 |
for key in entities:
|
|
@@ -78,8 +78,9 @@ def recognize_entities_auto(text, gold_entities=None):
|
|
| 78 |
metrics_text = ""
|
| 79 |
if gold_entities:
|
| 80 |
for key in ['PER','ORG','LOC']:
|
| 81 |
-
|
| 82 |
-
|
|
|
|
| 83 |
|
| 84 |
tp = len(pred_set & gold_set)
|
| 85 |
fp = len(pred_set - gold_set)
|
|
|
|
| 46 |
results = english_ner(text)
|
| 47 |
for res in results:
|
| 48 |
label = res['entity_group']
|
| 49 |
+
word = res['word'].replace("##", "").strip() # очистка токенов
|
| 50 |
if label in ["PER", "ORG", "LOC", "GPE"]:
|
| 51 |
if label == "GPE":
|
| 52 |
label = "LOC"
|
|
|
|
| 58 |
for span in doc.spans:
|
| 59 |
label = span.type
|
| 60 |
if label in ["PER", "ORG", "LOC"]:
|
| 61 |
+
entities[label].append(span.text.strip())
|
| 62 |
|
| 63 |
# Убираем дубликаты
|
| 64 |
for key in entities:
|
|
|
|
| 78 |
metrics_text = ""
|
| 79 |
if gold_entities:
|
| 80 |
for key in ['PER','ORG','LOC']:
|
| 81 |
+
# Приводим к нижнему регистру для корректного сравнения
|
| 82 |
+
pred_set = set([p.lower().strip() for p in entities[key]])
|
| 83 |
+
gold_set = set([g.lower().strip() for g in gold_entities.get(key, [])])
|
| 84 |
|
| 85 |
tp = len(pred_set & gold_set)
|
| 86 |
fp = len(pred_set - gold_set)
|