RWKV-ScaleLens / core /segmentation.py
Jellyfish042's picture
feat: support RWKV model A/B selection and space-ready model handling
f5e1a93
raw
history blame contribute delete
593 Bytes
"""
Fallback segmentation utilities.
Used for offline tests or snapshot generation when model tokenizers
are unavailable.
"""
from typing import Dict, List
def fallback_token_info(text: str) -> Dict[str, List]:
"""Return minimal token info using UTF-8 codepoint boundaries."""
boundaries = [0]
byte_pos = 0
for ch in text:
byte_pos += len(ch.encode("utf-8"))
boundaries.append(byte_pos)
return {
"common_boundaries": boundaries,
"qwen_tokens": [],
"rwkv_tokens": [],
"byte_to_qwen": {},
"byte_to_rwkv": {},
}