PyTorch
gpt2
achille-fusco commited on
Commit
dac8022
·
verified ·
1 Parent(s): bf3d960

Update tokenizer.py

Browse files
Files changed (1) hide show
  1. tokenizer.py +7 -8
tokenizer.py CHANGED
@@ -223,14 +223,7 @@ def remap_offsets_to_raw(offsets: List[Tuple[int,int]], pre2raw: List[Optional[i
223
  mapped.append((rs if rs is not None else 0, re_ if re_ is not None else 0))
224
  return mapped
225
 
226
- # ----------------------------
227
- # Public wrapper
228
- # ----------------------------
229
- class ParadigmTokenizerWrapper(PreTrainedTokenizerFast):
230
- slow_tokenizer_class = None
231
-
232
- # add near the top of ParadigmTokenizerWrapper
233
- def _coerce_to_str(x):
234
  # common cases first
235
  if isinstance(x, str):
236
  return x
@@ -259,6 +252,12 @@ class ParadigmTokenizerWrapper(PreTrainedTokenizerFast):
259
  return str(x)
260
  # final fallback
261
  return str(x)
 
 
 
 
 
 
262
 
263
  def __init__(self, *args, **kwargs):
264
  # ensure fast tokenizer is loaded directly (no slow->fast conversion)
 
223
  mapped.append((rs if rs is not None else 0, re_ if re_ is not None else 0))
224
  return mapped
225
 
226
+ def _coerce_to_str(x):
 
 
 
 
 
 
 
227
  # common cases first
228
  if isinstance(x, str):
229
  return x
 
252
  return str(x)
253
  # final fallback
254
  return str(x)
255
+
256
+ # ----------------------------
257
+ # Public wrapper
258
+ # ----------------------------
259
+ class ParadigmTokenizerWrapper(PreTrainedTokenizerFast):
260
+ slow_tokenizer_class = None
261
 
262
  def __init__(self, *args, **kwargs):
263
  # ensure fast tokenizer is loaded directly (no slow->fast conversion)