Commit
·
e7392bf
1
Parent(s):
6deb831
move to pregenerated tokens - some bug with word ids -- move to the inital ones
Browse files- generic_ner.py +4 -1
generic_ner.py
CHANGED
|
@@ -266,7 +266,10 @@ def realign(
|
|
| 266 |
):
|
| 267 |
preds_list, words_list, confidence_list = [], [], []
|
| 268 |
# word_ids = tokenizer(tokens, is_split_into_words=True).word_ids()
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
| 270 |
for idx, word in enumerate(tokens):
|
| 271 |
beginning_index = word_ids.index(idx)
|
| 272 |
try:
|
|
|
|
| 266 |
):
|
| 267 |
preds_list, words_list, confidence_list = [], [], []
|
| 268 |
# word_ids = tokenizer(tokens, is_split_into_words=True).word_ids()
|
| 269 |
+
print('--'*20)
|
| 270 |
+
print("word_ids", word_ids)
|
| 271 |
+
print("tokens", tokens)
|
| 272 |
+
print('--'*20)
|
| 273 |
for idx, word in enumerate(tokens):
|
| 274 |
beginning_index = word_ids.index(idx)
|
| 275 |
try:
|