student2222333051 commited on
Commit
67cfabf
·
verified ·
1 Parent(s): 901c892

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -46,7 +46,7 @@ def recognize_entities_auto(text, gold_entities=None):
46
  results = english_ner(text)
47
  for res in results:
48
  label = res['entity_group']
49
- word = res['word']
50
  if label in ["PER", "ORG", "LOC", "GPE"]:
51
  if label == "GPE":
52
  label = "LOC"
@@ -58,7 +58,7 @@ def recognize_entities_auto(text, gold_entities=None):
58
  for span in doc.spans:
59
  label = span.type
60
  if label in ["PER", "ORG", "LOC"]:
61
- entities[label].append(span.text)
62
 
63
  # Убираем дубликаты
64
  for key in entities:
@@ -78,8 +78,9 @@ def recognize_entities_auto(text, gold_entities=None):
78
  metrics_text = ""
79
  if gold_entities:
80
  for key in ['PER','ORG','LOC']:
81
- pred_set = set(entities[key])
82
- gold_set = set(gold_entities.get(key, []))
 
83
 
84
  tp = len(pred_set & gold_set)
85
  fp = len(pred_set - gold_set)
 
46
  results = english_ner(text)
47
  for res in results:
48
  label = res['entity_group']
49
+ word = res['word'].replace("##", "").strip() # очистка токенов
50
  if label in ["PER", "ORG", "LOC", "GPE"]:
51
  if label == "GPE":
52
  label = "LOC"
 
58
  for span in doc.spans:
59
  label = span.type
60
  if label in ["PER", "ORG", "LOC"]:
61
+ entities[label].append(span.text.strip())
62
 
63
  # Убираем дубликаты
64
  for key in entities:
 
78
  metrics_text = ""
79
  if gold_entities:
80
  for key in ['PER','ORG','LOC']:
81
+ # Приводим к нижнему регистру для корректного сравнения
82
+ pred_set = set([p.lower().strip() for p in entities[key]])
83
+ gold_set = set([g.lower().strip() for g in gold_entities.get(key, [])])
84
 
85
  tp = len(pred_set & gold_set)
86
  fp = len(pred_set - gold_set)