Update tokenizer.py
Browse files- tokenizer.py +7 -8
tokenizer.py
CHANGED
|
@@ -223,14 +223,7 @@ def remap_offsets_to_raw(offsets: List[Tuple[int,int]], pre2raw: List[Optional[i
|
|
| 223 |
mapped.append((rs if rs is not None else 0, re_ if re_ is not None else 0))
|
| 224 |
return mapped
|
| 225 |
|
| 226 |
-
|
| 227 |
-
# Public wrapper
|
| 228 |
-
# ----------------------------
|
| 229 |
-
class ParadigmTokenizerWrapper(PreTrainedTokenizerFast):
|
| 230 |
-
slow_tokenizer_class = None
|
| 231 |
-
|
| 232 |
-
# add near the top of ParadigmTokenizerWrapper
|
| 233 |
-
def _coerce_to_str(x):
|
| 234 |
# common cases first
|
| 235 |
if isinstance(x, str):
|
| 236 |
return x
|
|
@@ -259,6 +252,12 @@ class ParadigmTokenizerWrapper(PreTrainedTokenizerFast):
|
|
| 259 |
return str(x)
|
| 260 |
# final fallback
|
| 261 |
return str(x)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
def __init__(self, *args, **kwargs):
|
| 264 |
# ensure fast tokenizer is loaded directly (no slow->fast conversion)
|
|
|
|
| 223 |
mapped.append((rs if rs is not None else 0, re_ if re_ is not None else 0))
|
| 224 |
return mapped
|
| 225 |
|
| 226 |
+
def _coerce_to_str(x):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
# common cases first
|
| 228 |
if isinstance(x, str):
|
| 229 |
return x
|
|
|
|
| 252 |
return str(x)
|
| 253 |
# final fallback
|
| 254 |
return str(x)
|
| 255 |
+
|
| 256 |
+
# ----------------------------
|
| 257 |
+
# Public wrapper
|
| 258 |
+
# ----------------------------
|
| 259 |
+
class ParadigmTokenizerWrapper(PreTrainedTokenizerFast):
|
| 260 |
+
slow_tokenizer_class = None
|
| 261 |
|
| 262 |
def __init__(self, *args, **kwargs):
|
| 263 |
# ensure fast tokenizer is loaded directly (no slow->fast conversion)
|