File size: 5,043 Bytes
708f4a3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | """
CPU Backend Wrapper for Crayon
================================
This module wraps the compiled C++ extension for CPU tokenization.
Falls back to pure Python implementation if extension is not available.
"""
import sys
import os
import importlib.util
def _load_cpu_extension():
"""Load compiled CPU extension with platform-specific naming."""
# Determine extension filename based on platform
if sys.platform == "win32":
# Windows: look for .pyd files with version info
search_dirs = [
os.path.dirname(__file__), # Current directory
os.path.join(os.path.dirname(__file__), "compiled"), # Compiled subdirectory
]
ext_file = None
search_dir = None
for search_dir in search_dirs:
if os.path.exists(search_dir):
ext_files = [f for f in os.listdir(search_dir)
if f.startswith('crayon_cpu') and f.endswith('.pyd')]
if ext_files:
ext_file = ext_files[0]
break
if not ext_file:
raise ImportError("No compiled CPU extension found (.pyd file)")
else:
# Linux/macOS: look for .so files
search_dirs = [
os.path.dirname(__file__), # Current directory
os.path.join(os.path.dirname(__file__), "compiled"), # Compiled subdirectory
]
ext_file = None
search_dir = None
for search_dir in search_dirs:
if os.path.exists(search_dir):
ext_files = [f for f in os.listdir(search_dir)
if f.startswith('crayon_cpu') and f.endswith('.so')]
if ext_files:
ext_file = ext_files[0]
break
if not ext_file:
# Try to find any .so file as last resort
for search_dir in search_dirs:
if os.path.exists(search_dir):
so_files = [f for f in os.listdir(search_dir) if f.endswith('.so')]
if so_files:
ext_file = so_files[0]
print(f"π Found .so file: {ext_file}")
break
if not ext_file:
raise ImportError("No compiled CPU extension found (.so file)")
# Load extension
ext_path = os.path.join(search_dir, ext_file)
print(f"π Loading CPU extension from: {ext_path}")
try:
# Try direct import first
ext_dir = os.path.dirname(ext_path)
if ext_dir not in sys.path:
sys.path.insert(0, ext_dir)
# Remove .pyd/.so extension for module name
module_name = os.path.splitext(ext_file)[0]
spec = importlib.util.spec_from_file_location(module_name, ext_path)
if spec is None or spec.loader is None:
raise ImportError(f"Could not create spec for {ext_path}")
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
print(f"β
Successfully loaded compiled extension: {ext_file}")
return mod
except Exception as e:
# Try alternative loading method
try:
import importlib.machinery
loader = importlib.machinery.ExtensionFileLoader(module_name, ext_path)
spec = importlib.util.spec_from_file_location(module_name, ext_path, loader=loader)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
print(f"β
Successfully loaded compiled extension (alt method): {ext_file}")
return mod
except Exception as e2:
raise ImportError(f"Failed to load extension {ext_path}: {e}\nAlternative method failed: {e2}")
# Try to load the compiled extension
try:
_cpu_ext = _load_cpu_extension()
print("β Using compiled C++ extension for maximum performance")
except ImportError as e:
print(f"β Compiled extension not available: {e}")
print("π Falling back to pure Python implementation (slower but functional)")
# Load pure Python fallback
try:
from . import crayon_cpu_fallback as _cpu_ext
print("β Pure Python fallback loaded successfully")
except ImportError as fallback_error:
raise ImportError(
f"Failed to load both compiled extension and pure Python fallback:\n"
f"Extension error: {e}\n"
f"Fallback error: {fallback_error}\n"
"This suggests a corrupted installation. Try reinstalling with:\n"
" pip install --force-reinstall xerv-crayon"
)
# Export the required functions
tokenize = _cpu_ext.tokenize
load_dat = _cpu_ext.load_dat
# Export hardware info if available
if hasattr(_cpu_ext, 'get_hardware_info'):
get_hardware_info = _cpu_ext.get_hardware_info
else:
def get_hardware_info():
return "CPU Backend [Unknown]"
__all__ = ['tokenize', 'load_dat', 'get_hardware_info']
|