Xerv-AI
/

CRAYON-tokenizer

Text Generation

hardware-accelerated

double-array-trie

Model card Files Files and versions

CRAYON-tokenizer / src /crayon /cli.py

Phase-Technologies's picture

Phase-Technologies

Upload folder using huggingface_hub

708f4a3 verified 4 days ago

history blame contribute delete

3.88 kB

	"""
	XERV Crayon CLI - Command Line Interface
	=========================================
	Provides command-line tools for benchmarking and vocabulary management.
	"""
	import sys
	import time
	import argparse


	def run_benchmark():
	"""Run a quick benchmark of the Crayon tokenizer."""
	parser = argparse.ArgumentParser(
	prog='crayon-benchmark',
	description='XERV Crayon Tokenizer Benchmark Tool'
	)
	parser.add_argument(
	'--profile', '-p',
	default='lite',
	choices=['lite', 'standard'],
	help='Vocabulary profile to use (default: lite)'
	)
	parser.add_argument(
	'--iterations', '-n',
	type=int,
	default=10,
	help='Number of benchmark iterations (default: 10)'
	)
	parser.add_argument(
	'--text', '-t',
	default=None,
	help='Custom text to tokenize (default: built-in test text)'
	)

	args = parser.parse_args()

	print("=" * 60)
	print("XERV CRAYON TOKENIZER BENCHMARK")
	print("=" * 60)

	try:
	from crayon import CrayonVocab
	except ImportError as e:
	print(f"[ERROR] Failed to import crayon: {e}")
	print("Make sure xerv-crayon is properly installed.")
	sys.exit(1)

	# Load vocabulary
	print(f"\n[INFO] Loading profile: {args.profile}")
	start = time.perf_counter()

	try:
	vocab = CrayonVocab.load_profile(args.profile)
	except Exception as e:
	print(f"[ERROR] Failed to load profile: {e}")
	sys.exit(1)

	load_time = (time.perf_counter() - start) * 1000

	if vocab.fast_mode:
	print(f"[OK] Loaded with AVX2 engine ({load_time:.2f}ms)")
	else:
	print(f"[WARN] Loaded in fallback mode ({load_time:.2f}ms)")

	# Prepare test text
	if args.text:
	test_text = args.text
	else:
	test_text = """
	def matrix_multiply(A, B):
	# Standard O(n^3) matrix multiplication
	result = [[0 for _ in range(len(B[0]))] for _ in range(len(A))]
	for i in range(len(A)):
	for j in range(len(B[0])):
	for k in range(len(B)):
	result[i][j] += A[i][k] * B[k][j]
	return result

	The quick brown fox jumps over the lazy dog.
	Machine learning models require efficient tokenization for optimal performance.
	""" * 100 # Repeat for meaningful benchmark

	text_size = len(test_text.encode('utf-8'))
	print(f"\n[INFO] Test text size: {text_size:,} bytes ({text_size/1024:.1f} KB)")
	print(f"[INFO] Iterations: {args.iterations}")

	# Warmup
	print("\n[INFO] Warming up...")
	for _ in range(2):
	_ = vocab.tokenize(test_text)

	# Benchmark
	print("[INFO] Running benchmark...")
	times = []
	token_counts = []

	for i in range(args.iterations):
	start = time.perf_counter()
	tokens = vocab.tokenize(test_text)
	elapsed = time.perf_counter() - start
	times.append(elapsed)
	token_counts.append(len(tokens))

	# Calculate metrics
	avg_time = sum(times) / len(times)
	min_time = min(times)
	max_time = max(times)
	avg_tokens = sum(token_counts) / len(token_counts)
	tokens_per_sec = avg_tokens / avg_time
	mb_per_sec = (text_size / 1024 / 1024) / avg_time

	# Print results
	print("\n" + "=" * 60)
	print("RESULTS")
	print("=" * 60)
	print(f" Profile: {args.profile}")
	print(f" Token Count: {int(avg_tokens):,}")
	print(f" Tokens/sec: {tokens_per_sec:,.0f}")
	print(f" MB/sec: {mb_per_sec:.2f}")
	print(f" Avg Time: {avg_time*1000:.2f}ms")
	print(f" Min Time: {min_time*1000:.2f}ms")
	print(f" Max Time: {max_time*1000:.2f}ms")
	print("=" * 60)

	return 0


	def main():
	"""Main entry point."""
	return run_benchmark()


	if __name__ == '__main__':
	sys.exit(main())