File size: 3,439 Bytes
708f4a3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | """
Test all code examples from README.md to ensure they work correctly.
"""
import sys
import os
# Add paths
sys.path.insert(0, os.path.join(os.getcwd(), "build", "lib.win-amd64-cpython-313"))
sys.path.insert(0, os.path.join(os.getcwd(), "src"))
print("=" * 70)
print("TESTING README CODE EXAMPLES")
print("=" * 70)
print()
# Test 1: Quick Start Example
print("[TEST 1] Quick Start - Load Profile and Tokenize")
print("-" * 70)
try:
from crayon.core.vocabulary import CrayonVocab
vocab = CrayonVocab(device="auto")
vocab.load_profile("lite")
# Tokenize specialized syntax
code_snippet = "fn main() { println!(\"Hello, World!\"); }"
tokens = vocab.tokenize(code_snippet)
# Check if decode works
try:
decoded = vocab.decode(tokens)
print(f"β Tokenize: {code_snippet}")
print(f"β Tokens: {tokens}")
print(f"β Decoded: {decoded}")
print("β TEST PASSED")
except AttributeError:
print(f"β WARNING: vocab.decode() not implemented yet")
print(f"β Tokenize works: {tokens}")
print("β TEST PARTIALLY PASSED")
except Exception as e:
print(f"β TEST FAILED: {e}")
import traceback
traceback.print_exc()
print()
# Test 2: Load different profiles
print("[TEST 2] Load Different Profiles")
print("-" * 70)
for profile_name in ["lite", "standard"]:
try:
vocab = CrayonVocab(device="auto")
vocab.load_profile(profile_name)
print(f"β Loaded '{profile_name}' profile")
except Exception as e:
print(f"β Failed to load '{profile_name}': {e}")
print()
# Test 3: DAT Builder Example
print("[TEST 3] Compile Vocabulary to DAT Format")
print("-" * 70)
try:
from crayon.c_ext.dat_builder import DATBuilder
import json
import tempfile
# Use a small test vocab
test_vocab = ["hello", "world", "test", "python"]
# Compile to DAT
builder = DATBuilder()
builder.build(test_vocab)
# Save to temp file
dat_path = os.path.join(tempfile.gettempdir(), "test_readme.dat")
builder.save(dat_path)
print(f"β Built DAT with {builder.size} nodes")
print(f"β Saved to {dat_path}")
os.unlink(dat_path)
print("β TEST PASSED")
except Exception as e:
print(f"β TEST FAILED: {e}")
import traceback
traceback.print_exc()
print()
# Test 4: Direct C++ Engine Access
print("[TEST 4] Direct C++ Engine Access")
print("-" * 70)
try:
import mmap
from crayon.c_ext import crayon_fast
from crayon.c_ext.dat_builder import DATBuilder
import tempfile
# Build a small DAT
test_vocab = ["the", "quick", "brown", "fox"]
builder = DATBuilder()
builder.build(test_vocab)
dat_path = os.path.join(tempfile.gettempdir(), "test_engine.dat")
builder.save(dat_path)
# Zero-copy load via mmap
with open(dat_path, "rb") as f:
mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
size = crayon_fast.load_dat(mm)
# Ultra-fast tokenization
tokens = crayon_fast.tokenize("the quick brown fox")
print(f"β Loaded DAT: {size} nodes")
print(f"β Tokenized: {tokens}")
os.unlink(dat_path)
print("β TEST PASSED")
except Exception as e:
print(f"β TEST FAILED: {e}")
import traceback
traceback.print_exc()
print()
print("=" * 70)
print("README CODE TESTS COMPLETE")
print("=" * 70)
|