emanuelaboros commited on
Commit
e7392bf
·
1 Parent(s): 6deb831

move to pregenerated tokens - some bug with word ids -- move to the inital ones

Browse files
Files changed (1) hide show
  1. generic_ner.py +4 -1
generic_ner.py CHANGED
@@ -266,7 +266,10 @@ def realign(
266
  ):
267
  preds_list, words_list, confidence_list = [], [], []
268
  # word_ids = tokenizer(tokens, is_split_into_words=True).word_ids()
269
-
 
 
 
270
  for idx, word in enumerate(tokens):
271
  beginning_index = word_ids.index(idx)
272
  try:
 
266
  ):
267
  preds_list, words_list, confidence_list = [], [], []
268
  # word_ids = tokenizer(tokens, is_split_into_words=True).word_ids()
269
+ print('--'*20)
270
+ print("word_ids", word_ids)
271
+ print("tokens", tokens)
272
+ print('--'*20)
273
  for idx, word in enumerate(tokens):
274
  beginning_index = word_ids.index(idx)
275
  try: