emanuelaboros commited on
Commit
ed9f086
·
verified ·
1 Parent(s): 7d319f7

Update generic_ner.py

Browse files
Files changed (1) hide show
  1. generic_ner.py +3 -24
generic_ner.py CHANGED
@@ -57,7 +57,7 @@ def find_entity_indices(article_text, search_text):
57
  original_end_index += 1 # Increment to include the last character
58
 
59
  # Append the found indices to the list
60
- if article_text[original_start_index] == ' ':
61
  original_start_index += 1
62
  indices.append((original_start_index, original_end_index))
63
 
@@ -67,27 +67,6 @@ def find_entity_indices(article_text, search_text):
67
  return indices
68
 
69
 
70
- # def find_entity_indices(article, entity):
71
- # """
72
- # Find all occurrences of an entity in the article and return their indices.
73
- #
74
- # :param article: The complete article text.
75
- # :param entity: The entity to search for.
76
- # :return: A list of tuples (lArticleOffset, rArticleOffset) for each occurrence.
77
- # """
78
- #
79
- # # normalized_target = normalize_text(entity)
80
- # # normalized_document = normalize_text(article)
81
- #
82
- # entity_indices = []
83
- # for match in re.finditer(re.escape(entity), article):
84
- # start_idx = match.start()
85
- # end_idx = match.end()
86
- # entity_indices.append((start_idx, end_idx))
87
- #
88
- # return entity_indices
89
-
90
-
91
  def get_entities(tokens, tags, confidences, text):
92
 
93
  tags = [tag.replace("S-", "B-").replace("E-", "I-") for tag in tags]
@@ -111,8 +90,8 @@ def get_entities(tokens, tags, confidences, text):
111
  entities.append(
112
  {
113
  "entity": original_label,
114
- "score": int(
115
- np.average(confidences[idx : idx + len(subtree)]) * 100
116
  ),
117
  "index": (idx, idx + len(subtree)),
118
  "word": original_string,
 
57
  original_end_index += 1 # Increment to include the last character
58
 
59
  # Append the found indices to the list
60
+ if article_text[original_start_index] == " ":
61
  original_start_index += 1
62
  indices.append((original_start_index, original_end_index))
63
 
 
67
  return indices
68
 
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  def get_entities(tokens, tags, confidences, text):
71
 
72
  tags = [tag.replace("S-", "B-").replace("E-", "I-") for tag in tags]
 
90
  entities.append(
91
  {
92
  "entity": original_label,
93
+ "score": round(
94
+ np.average(confidences[idx : idx + len(subtree)]) * 100, 2
95
  ),
96
  "index": (idx, idx + len(subtree)),
97
  "word": original_string,