Spaces:

student2222333051
/

project1

Sleeping

student2222333051 commited on 29 days ago

Commit

67cfabf

verified ·

1 Parent(s): 901c892

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -46,7 +46,7 @@ def recognize_entities_auto(text, gold_entities=None):
         results = english_ner(text)
         for res in results:
             label = res['entity_group']
-            word = res['word']
             if label in ["PER", "ORG", "LOC", "GPE"]:
                 if label == "GPE":
                     label = "LOC"
@@ -58,7 +58,7 @@ def recognize_entities_auto(text, gold_entities=None):
         for span in doc.spans:
             label = span.type
             if label in ["PER", "ORG", "LOC"]:
-                entities[label].append(span.text)
     # Убираем дубликаты
     for key in entities:
@@ -78,8 +78,9 @@ def recognize_entities_auto(text, gold_entities=None):
     metrics_text = ""
     if gold_entities:
         for key in ['PER','ORG','LOC']:
-            pred_set = set(entities[key])
-            gold_set = set(gold_entities.get(key, []))
             tp = len(pred_set & gold_set)
             fp = len(pred_set - gold_set)

         results = english_ner(text)
         for res in results:
             label = res['entity_group']
+            word = res['word'].replace("##", "").strip()  # очистка токенов
             if label in ["PER", "ORG", "LOC", "GPE"]:
                 if label == "GPE":
                     label = "LOC"
         for span in doc.spans:
             label = span.type
             if label in ["PER", "ORG", "LOC"]:
+                entities[label].append(span.text.strip())
     # Убираем дубликаты
     for key in entities:
     metrics_text = ""
     if gold_entities:
         for key in ['PER','ORG','LOC']:
+            # Приводим к нижнему регистру для корректного сравнения
+            pred_set = set([p.lower().strip() for p in entities[key]])
+            gold_set = set([g.lower().strip() for g in gold_entities.get(key, [])])
             tp = len(pred_set & gold_set)
             fp = len(pred_set - gold_set)