File size: 6,685 Bytes
708f4a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
"""
XERV CRAYON SETUP v5.3.5 - WITH C++ EXTENSIONS
==============================================
Builds native extensions for maximum performance on CPU (AVX2), CUDA, and ROCm
"""

import os
import sys
import platform
import shutil
import sysconfig
import subprocess
from setuptools import setup, find_packages, Extension
from setuptools.command.build_ext import build_ext

VERSION = "5.3.6"

class CustomBuildExt(build_ext):
    """Custom build extension with CUDA support and fallback for missing compilers"""
    
    def build_extension(self, ext):
        try:
            # Special handling for CUDA extensions
            if ext.name.endswith('_cuda'):
                self._build_cuda_extension(ext)
            else:
                super().build_extension(ext)
            print(f"Successfully built: {ext.name}")
        except Exception as e:
            print(f"Warning: Failed to build {ext.name}: {e}")
    
    def _build_cuda_extension(self, ext):
        """Build CUDA extension using nvcc"""
        cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
        nvcc = shutil.which('nvcc') or (os.path.join(cuda_home, 'bin', 'nvcc') if cuda_home else None)
        
        if not nvcc or not os.path.exists(nvcc):
            raise RuntimeError("NVCC not found")
        
        # Build directory
        build_temp = os.path.join(self.build_temp, ext.name)
        os.makedirs(build_temp, exist_ok=True)
        
        # Output directory  
        build_lib = os.path.join(self.build_lib, 'crayon', 'c_ext')
        os.makedirs(build_lib, exist_ok=True)
        
        # Source file
        cuda_src = ext.sources[0]
        
        # Object file
        obj_file = os.path.join(build_temp, 'cuda_engine.o')
        
        # Library file
        lib_name = f"{ext.name}{sysconfig.get_config_var('EXT_SUFFIX')}"
        lib_file = os.path.join(build_lib, lib_name)
        
        # Include directories
        include_dirs = [
            sysconfig.get_paths()['include'],  # Python headers
            os.path.join(os.path.dirname(nvcc), '..', 'include'),  # CUDA headers
        ]
        include_flags = ' '.join(f'-I"{d}"' for d in include_dirs if os.path.exists(d))
        
        # CUDA architecture flags (compile for common GPUs)
        gpu_arch_flags = '-gencode=arch=compute_70,code=sm_70 ' \
                       '-gencode=arch=compute_75,code=sm_75 ' \
                       '-gencode=arch=compute_80,code=sm_80 ' \
                       '-gencode=arch=compute_86,code=sm_86 ' \
                       '-gencode=arch=compute_89,code=sm_89 ' \
                       '-gencode=arch=compute_90,code=sm_90'
        
        # Compile CUDA to object
        compile_cmd = f'"{nvcc}" -c "{cuda_src}" -o "{obj_file}" {include_flags} ' \
                      f'-O3 --compiler-options "-fPIC" -std=c++17 {gpu_arch_flags}'
        
        print(f"Compiling CUDA extension: {compile_cmd}")
        subprocess.check_call(compile_cmd, shell=True)
        
        # Link into shared library
        link_cmd = f'"{nvcc}" -shared "{obj_file}" -o "{lib_file}" ' \
                   f'-L"{os.path.join(os.path.dirname(nvcc), "..", "lib64")}" -lcudart'
        
        print(f"Linking CUDA extension: {link_cmd}")
        subprocess.check_call(link_cmd, shell=True)
        
        # Copy to final destination
        dest_file = os.path.join(self.get_ext_fullpath(ext.name))
        os.makedirs(os.path.dirname(dest_file), exist_ok=True)
        shutil.copy2(lib_file, dest_file)

def get_extensions():
    """Get list of C/C++ extensions to build"""
    extensions = []
    
    # Use relative paths from setup.py location
    c_ext_dir = os.path.join("src", "crayon", "c_ext")
    
    # CPU EXTENSION
    cpu_sources = []
    cpu_engine_path = os.path.join(c_ext_dir, "cpu_engine.cpp")
    crayon_module_path = os.path.join(c_ext_dir, "crayon_module.c")
    simd_ops_path = os.path.join(c_ext_dir, "simd_ops.c")
    
    if os.path.exists(cpu_engine_path):
        cpu_sources.append(cpu_engine_path)
    elif os.path.exists(crayon_module_path):
        cpu_sources.extend([crayon_module_path, simd_ops_path])
    
    if cpu_sources:
        if platform.system() == 'Windows':
            extra_args = ['/O2', '/std:c++17', '/W3', '/wd4244', '/wd4267']
        else:
            extra_args = ['-O3', '-std=c++17', '-fPIC', '-Wall']
            if platform.machine() in ('x86_64', 'AMD64'):
                extra_args.extend(['-mavx2', '-mfma'])
        
        cpu_ext = Extension(
            'crayon.c_ext.crayon_cpu',
            sources=cpu_sources,
            include_dirs=[c_ext_dir],
            extra_compile_args=extra_args,
            language='c++'
        )
        extensions.append(cpu_ext)
    
    # CUDA EXTENSION (Linux only - requires nvcc)
    if platform.system() != 'Windows':
        cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
        nvcc = shutil.which('nvcc') or (os.path.join(cuda_home, 'bin', 'nvcc') if cuda_home else None)
        cuda_src = os.path.join(c_ext_dir, "gpu_engine_cuda.cu")
        
        if nvcc and os.path.exists(nvcc) and os.path.exists(cuda_src) and not os.environ.get('CRAYON_SKIP_CUDA'):
            cuda_ext = Extension(
                'crayon.c_ext.crayon_cuda',
                sources=[cuda_src],
                include_dirs=[c_ext_dir],
                language='c++'
            )
            extensions.append(cuda_ext)
            print(f"CUDA extension configured (NVCC: {nvcc})")
    
    return extensions

build_extensions = '--no-extensions' not in sys.argv

if build_extensions:
    try:
        extensions = get_extensions()
    except Exception as e:
        print(f"Extension setup failed: {e}")
        extensions = []
else:
    extensions = []
    sys.argv.remove('--no-extensions')

setup(
    name="xerv-crayon",
    version=VERSION,
    author="Xerv Research Engineering Division",
    description="Omni-Backend Tokenizer - CPU (AVX2/512), CUDA (NVIDIA), ROCm (AMD)",
    long_description=open("README.md", encoding="utf-8").read(),
    long_description_content_type="text/markdown",
    packages=find_packages("src"),
    package_dir={"": "src"},
    python_requires=">=3.8,<3.14",
    install_requires=["numpy>=1.21.0"],
    ext_modules=extensions,
    cmdclass={'build_ext': CustomBuildExt},
    package_data={
        "crayon": [
            "resources/dat/*.dat",
            "resources/dat/*.json",
            "resources/*.txt",
            "c_ext/*.h",
            "c_ext/*.c",
            "c_ext/*.cpp",
            "c_ext/*.cu",
            "c_ext/*.hip",
        ]
    },
    include_package_data=True,
)