File size: 3,439 Bytes
708f4a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""
Test all code examples from README.md to ensure they work correctly.
"""
import sys
import os

# Add paths
sys.path.insert(0, os.path.join(os.getcwd(), "build", "lib.win-amd64-cpython-313"))
sys.path.insert(0, os.path.join(os.getcwd(), "src"))

print("=" * 70)
print("TESTING README CODE EXAMPLES")
print("=" * 70)
print()

# Test 1: Quick Start Example
print("[TEST 1] Quick Start - Load Profile and Tokenize")
print("-" * 70)
try:
    from crayon.core.vocabulary import CrayonVocab
    
    vocab = CrayonVocab(device="auto")
    vocab.load_profile("lite")
    
    # Tokenize specialized syntax
    code_snippet = "fn main() { println!(\"Hello, World!\"); }"
    tokens = vocab.tokenize(code_snippet)
    
    # Check if decode works
    try:
        decoded = vocab.decode(tokens)
        print(f"βœ“ Tokenize: {code_snippet}")
        print(f"βœ“ Tokens: {tokens}")
        print(f"βœ“ Decoded: {decoded}")
        print("βœ“ TEST PASSED")
    except AttributeError:
        print(f"⚠ WARNING: vocab.decode() not implemented yet")
        print(f"βœ“ Tokenize works: {tokens}")
        print("βœ“ TEST PARTIALLY PASSED")
except Exception as e:
    print(f"βœ— TEST FAILED: {e}")
    import traceback
    traceback.print_exc()

print()

# Test 2: Load different profiles
print("[TEST 2] Load Different Profiles")
print("-" * 70)
for profile_name in ["lite", "standard"]:
    try:
        vocab = CrayonVocab(device="auto")
        vocab.load_profile(profile_name)
        print(f"βœ“ Loaded '{profile_name}' profile")
    except Exception as e:
        print(f"βœ— Failed to load '{profile_name}': {e}")

print()

# Test 3: DAT Builder Example
print("[TEST 3] Compile Vocabulary to DAT Format")
print("-" * 70)
try:
    from crayon.c_ext.dat_builder import DATBuilder
    import json
    import tempfile
    
    # Use a small test vocab
    test_vocab = ["hello", "world", "test", "python"]
    
    # Compile to DAT
    builder = DATBuilder()
    builder.build(test_vocab)
    
    # Save to temp file
    dat_path = os.path.join(tempfile.gettempdir(), "test_readme.dat")
    builder.save(dat_path)
    
    print(f"βœ“ Built DAT with {builder.size} nodes")
    print(f"βœ“ Saved to {dat_path}")
    
    os.unlink(dat_path)
    print("βœ“ TEST PASSED")
except Exception as e:
    print(f"βœ— TEST FAILED: {e}")
    import traceback
    traceback.print_exc()

print()

# Test 4: Direct C++ Engine Access
print("[TEST 4] Direct C++ Engine Access")
print("-" * 70)
try:
    import mmap
    from crayon.c_ext import crayon_fast
    from crayon.c_ext.dat_builder import DATBuilder
    import tempfile
    
    # Build a small DAT
    test_vocab = ["the", "quick", "brown", "fox"]
    builder = DATBuilder()
    builder.build(test_vocab)
    
    dat_path = os.path.join(tempfile.gettempdir(), "test_engine.dat")
    builder.save(dat_path)
    
    # Zero-copy load via mmap
    with open(dat_path, "rb") as f:
        mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
        size = crayon_fast.load_dat(mm)
    
    # Ultra-fast tokenization
    tokens = crayon_fast.tokenize("the quick brown fox")
    
    print(f"βœ“ Loaded DAT: {size} nodes")
    print(f"βœ“ Tokenized: {tokens}")
    
    os.unlink(dat_path)
    print("βœ“ TEST PASSED")
except Exception as e:
    print(f"βœ— TEST FAILED: {e}")
    import traceback
    traceback.print_exc()

print()
print("=" * 70)
print("README CODE TESTS COMPLETE")
print("=" * 70)