| """ |
| Test all code examples from README.md to ensure they work correctly. |
| """ |
| import sys |
| import os |
|
|
| |
| sys.path.insert(0, os.path.join(os.getcwd(), "build", "lib.win-amd64-cpython-313")) |
| sys.path.insert(0, os.path.join(os.getcwd(), "src")) |
|
|
| print("=" * 70) |
| print("TESTING README CODE EXAMPLES") |
| print("=" * 70) |
| print() |
|
|
| |
| print("[TEST 1] Quick Start - Load Profile and Tokenize") |
| print("-" * 70) |
| try: |
| from crayon.core.vocabulary import CrayonVocab |
| |
| vocab = CrayonVocab(device="auto") |
| vocab.load_profile("lite") |
| |
| |
| code_snippet = "fn main() { println!(\"Hello, World!\"); }" |
| tokens = vocab.tokenize(code_snippet) |
| |
| |
| try: |
| decoded = vocab.decode(tokens) |
| print(f"β Tokenize: {code_snippet}") |
| print(f"β Tokens: {tokens}") |
| print(f"β Decoded: {decoded}") |
| print("β TEST PASSED") |
| except AttributeError: |
| print(f"β WARNING: vocab.decode() not implemented yet") |
| print(f"β Tokenize works: {tokens}") |
| print("β TEST PARTIALLY PASSED") |
| except Exception as e: |
| print(f"β TEST FAILED: {e}") |
| import traceback |
| traceback.print_exc() |
|
|
| print() |
|
|
| |
| print("[TEST 2] Load Different Profiles") |
| print("-" * 70) |
| for profile_name in ["lite", "standard"]: |
| try: |
| vocab = CrayonVocab(device="auto") |
| vocab.load_profile(profile_name) |
| print(f"β Loaded '{profile_name}' profile") |
| except Exception as e: |
| print(f"β Failed to load '{profile_name}': {e}") |
|
|
| print() |
|
|
| |
| print("[TEST 3] Compile Vocabulary to DAT Format") |
| print("-" * 70) |
| try: |
| from crayon.c_ext.dat_builder import DATBuilder |
| import json |
| import tempfile |
| |
| |
| test_vocab = ["hello", "world", "test", "python"] |
| |
| |
| builder = DATBuilder() |
| builder.build(test_vocab) |
| |
| |
| dat_path = os.path.join(tempfile.gettempdir(), "test_readme.dat") |
| builder.save(dat_path) |
| |
| print(f"β Built DAT with {builder.size} nodes") |
| print(f"β Saved to {dat_path}") |
| |
| os.unlink(dat_path) |
| print("β TEST PASSED") |
| except Exception as e: |
| print(f"β TEST FAILED: {e}") |
| import traceback |
| traceback.print_exc() |
|
|
| print() |
|
|
| |
| print("[TEST 4] Direct C++ Engine Access") |
| print("-" * 70) |
| try: |
| import mmap |
| from crayon.c_ext import crayon_fast |
| from crayon.c_ext.dat_builder import DATBuilder |
| import tempfile |
| |
| |
| test_vocab = ["the", "quick", "brown", "fox"] |
| builder = DATBuilder() |
| builder.build(test_vocab) |
| |
| dat_path = os.path.join(tempfile.gettempdir(), "test_engine.dat") |
| builder.save(dat_path) |
| |
| |
| with open(dat_path, "rb") as f: |
| mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) |
| size = crayon_fast.load_dat(mm) |
| |
| |
| tokens = crayon_fast.tokenize("the quick brown fox") |
| |
| print(f"β Loaded DAT: {size} nodes") |
| print(f"β Tokenized: {tokens}") |
| |
| os.unlink(dat_path) |
| print("β TEST PASSED") |
| except Exception as e: |
| print(f"β TEST FAILED: {e}") |
| import traceback |
| traceback.print_exc() |
|
|
| print() |
| print("=" * 70) |
| print("README CODE TESTS COMPLETE") |
| print("=" * 70) |
|
|