Update generic_ner.py
Browse files- generic_ner.py +9 -4
generic_ner.py
CHANGED
|
@@ -9,6 +9,9 @@ import torch.nn.functional as F
|
|
| 9 |
import re
|
| 10 |
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
def tokenize(text):
|
| 13 |
# print(text)
|
| 14 |
for punctuation in string.punctuation:
|
|
@@ -109,14 +112,18 @@ def get_entities(tokens, tags, confidences, text):
|
|
| 109 |
"score": np.average(confidences[idx : idx + len(subtree)]),
|
| 110 |
"index": (idx, idx + len(subtree)),
|
| 111 |
"word": original_string,
|
| 112 |
-
"start": entity_start_position,
|
| 113 |
"end": entity_end_position,
|
| 114 |
-
"text": text,
|
| 115 |
}
|
| 116 |
)
|
| 117 |
# assert (
|
| 118 |
# text[entity_start_position:entity_end_position] == original_string
|
| 119 |
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
idx += len(subtree)
|
| 121 |
|
| 122 |
# Update the current character position
|
|
@@ -148,8 +155,6 @@ def realign(
|
|
| 148 |
return words_list, preds_list, confidence_list
|
| 149 |
|
| 150 |
|
| 151 |
-
import re, string
|
| 152 |
-
|
| 153 |
# List of additional "strange" punctuation marks
|
| 154 |
additional_punctuation = "‘’“”„«»•–—―‣◦…§¶†‡‰′″〈〉"
|
| 155 |
|
|
|
|
| 9 |
import re
|
| 10 |
|
| 11 |
|
| 12 |
+
import re, string
|
| 13 |
+
|
| 14 |
+
|
| 15 |
def tokenize(text):
|
| 16 |
# print(text)
|
| 17 |
for punctuation in string.punctuation:
|
|
|
|
| 112 |
"score": np.average(confidences[idx : idx + len(subtree)]),
|
| 113 |
"index": (idx, idx + len(subtree)),
|
| 114 |
"word": original_string,
|
| 115 |
+
"start": entity_start_position-1,
|
| 116 |
"end": entity_end_position,
|
|
|
|
| 117 |
}
|
| 118 |
)
|
| 119 |
# assert (
|
| 120 |
# text[entity_start_position:entity_end_position] == original_string
|
| 121 |
# )
|
| 122 |
+
print(
|
| 123 |
+
text[entity_start_position:entity_end_position],
|
| 124 |
+
"------",
|
| 125 |
+
original_string,
|
| 126 |
+
)
|
| 127 |
idx += len(subtree)
|
| 128 |
|
| 129 |
# Update the current character position
|
|
|
|
| 155 |
return words_list, preds_list, confidence_list
|
| 156 |
|
| 157 |
|
|
|
|
|
|
|
| 158 |
# List of additional "strange" punctuation marks
|
| 159 |
additional_punctuation = "‘’“”„«»•–—―‣◦…§¶†‡‰′″〈〉"
|
| 160 |
|