| """Rust acceleration loader + Python fallback gate.
|
|
|
| Importing this module exposes:
|
|
|
| - ``accel``: the loaded Rust extension module, or ``None`` if unavailable.
|
| - ``USE_RUST``: bool. ``True`` iff the Rust path should be used.
|
| - ``prepare_mapping(pua_mapping)``: returns a cached ``PreparedMapping``
|
| for the given ``PUAMapping`` instance, building it lazily on first use.
|
|
|
| The Rust path is the default. To force the pure-Python path (for parity
|
| testing or debugging), set ``CUTE_USE_PYTHON_PRETOKENIZER=1`` before
|
| importing :mod:`cute_tokenizer`.
|
| """
|
|
|
| from __future__ import annotations
|
|
|
| import contextlib
|
| import os
|
| import weakref
|
| from typing import Any
|
|
|
| _FORCE_PY = os.environ.get("CUTE_USE_PYTHON_PRETOKENIZER", "") not in (
|
| "",
|
| "0",
|
| "false",
|
| "False",
|
| )
|
|
|
| try:
|
| from cute_tokenizer import _accel as _accel_module
|
|
|
| _ACCEL_AVAILABLE = True
|
| except ImportError:
|
| _accel_module = None
|
| _ACCEL_AVAILABLE = False
|
|
|
| accel: Any = _accel_module
|
| USE_RUST: bool = _ACCEL_AVAILABLE and not _FORCE_PY
|
|
|
|
|
|
|
| _prepared_cache: dict[int, Any] = {}
|
|
|
|
|
| def _release(key: int) -> None:
|
| _prepared_cache.pop(key, None)
|
|
|
|
|
| def prepare_mapping(mapping: Any) -> Any:
|
| """Return a cached ``PreparedMapping`` for ``mapping``.
|
|
|
| Builds the prepared form on first call per mapping object; subsequent
|
| calls with the same object return the cached handle. The cache uses
|
| ``id(mapping)`` as key and a weakref finalizer to drop entries when
|
| the mapping is garbage collected.
|
|
|
| Raises ``RuntimeError`` if the Rust extension is not available.
|
| """
|
| if not _ACCEL_AVAILABLE:
|
| raise RuntimeError("cute_tokenizer._accel is not available")
|
| key = id(mapping)
|
| cached = _prepared_cache.get(key)
|
| if cached is not None:
|
| return cached
|
| prepared = accel.PreparedMapping(mapping.word_to_pua, mapping.pua_to_word)
|
| _prepared_cache[key] = prepared
|
|
|
|
|
|
|
|
|
| with contextlib.suppress(TypeError):
|
| weakref.finalize(mapping, _release, key)
|
| return prepared
|
|
|
|
|
| def build_tag() -> str:
|
| """Return the Rust extension build tag (or 'python-fallback')."""
|
| if USE_RUST:
|
| return getattr(accel, "__build_tag__", "rust-unknown")
|
| return "python-fallback"
|
|
|
|
|
| __all__ = ["USE_RUST", "accel", "build_tag", "prepare_mapping"]
|
|
|