# -*- mode: python ; coding: utf-8 -*- import sys import os from PyInstaller.utils.hooks import collect_all, collect_data_files, collect_submodules from pathlib import Path import glob import importlib.util import sysconfig block_cipher = None # Collect all data and binaries from critical packages datas = [] binaries = [] hiddenimports = [] # Function to detect conda environment and CUDA version def get_conda_cuda_libs(): """Detect and collect CUDA/cuDNN libraries from the active conda environment""" cuda_binaries = [] # Get the conda environment path conda_prefix = os.environ.get('CONDA_PREFIX', sys.prefix) print(f"Conda environment detected: {conda_prefix}") # Detect CUDA version from environment path or libraries cuda_version = None if 'cu118' in conda_prefix or 'cuda118' in conda_prefix: cuda_version = '11.8' elif 'cu122' in conda_prefix or 'cuda122' in conda_prefix: cuda_version = '12.2' elif 'cu128' in conda_prefix or 'cuda128' in conda_prefix: cuda_version = '12.8' else: # Try to detect from cudart version cudart_files = glob.glob(os.path.join(conda_prefix, 'lib', 'libcudart.so.*')) if cudart_files: cudart_file = os.path.basename(cudart_files[0]) if '11.8' in cudart_file: cuda_version = '11.8' elif '12.2' in cudart_file: cuda_version = '12.2' elif '12.8' in cudart_file: cuda_version = '12.8' print(f"Detected CUDA version: {cuda_version}") # Library paths to check - Windows uses different paths than Linux if sys.platform == 'win32': lib_dirs = [ os.path.join(conda_prefix, 'Library', 'bin'), # Primary location for Windows DLLs os.path.join(conda_prefix, 'bin'), # Alternative location os.path.join(conda_prefix, 'DLLs'), # Python DLLs location ] # Also check Python site-packages for ONNX Runtime libraries import site site_packages = site.getsitepackages() for sp in site_packages: if conda_prefix in sp: onnx_capi_path = os.path.join(sp, 'onnxruntime', 'capi') if os.path.exists(onnx_capi_path): lib_dirs.append(onnx_capi_path) print(f" Added ONNX Runtime path: {onnx_capi_path}") # Windows CUDA library patterns with version numbers cuda_libs_patterns = [ # CUDA Runtime 'cudart64_*.dll', 'cudart32_*.dll', # 32-bit variant if exists # cuBLAS 'cublas64_*.dll', 'cublasLt64_*.dll', # cuDNN libraries - critical for deep learning 'cudnn64_*.dll', 'cudnn_ops_infer64_*.dll', 'cudnn_ops_train64_*.dll', 'cudnn_cnn_infer64_*.dll', 'cudnn_cnn_train64_*.dll', 'cudnn_adv_infer64_*.dll', 'cudnn_adv_train64_*.dll', # For newer cuDNN versions (9.x) 'cudnn*.dll', # cuFFT 'cufft64_*.dll', 'cufftw64_*.dll', # cuRAND 'curand64_*.dll', # cuSPARSE 'cusparse64_*.dll', # cuSOLVER 'cusolver64_*.dll', 'cusolverMg64_*.dll', # NVRTC 'nvrtc64_*.dll', 'nvrtc-builtins64_*.dll', # NVIDIA Tools Extension 'nvToolsExt64_*.dll', # Additional potential libraries 'nppc64_*.dll', 'nppif64_*.dll', 'npps64_*.dll', # ONNX Runtime GPU dependencies (important!) 'onnxruntime_providers_cuda.dll', 'onnxruntime_providers_tensorrt.dll', 'onnxruntime_providers_shared.dll', # Python binding for ONNX Runtime 'onnxruntime_pybind11_state*.pyd', ] else: # Linux/Unix library paths lib_dirs = [ os.path.join(conda_prefix, 'lib'), os.path.join(conda_prefix, 'lib', 'stubs'), ] # Also check Python site-packages for ONNX Runtime libraries # This is crucial for finding libonnxruntime_providers_cuda.so, etc. import site # Try to get the site-packages directory in the conda environment python_version = f"python{sys.version_info.major}.{sys.version_info.minor}" site_packages_paths = [ os.path.join(conda_prefix, 'lib', python_version, 'site-packages'), os.path.join(conda_prefix, 'lib', 'python3.10', 'site-packages'), # Fallback for CI os.path.join(conda_prefix, 'lib', 'python3.11', 'site-packages'), # Alternative version ] for sp_path in site_packages_paths: onnx_capi_path = os.path.join(sp_path, 'onnxruntime', 'capi') if os.path.exists(onnx_capi_path): lib_dirs.append(onnx_capi_path) print(f" Added ONNX Runtime path: {onnx_capi_path}") break # Linux CUDA library patterns cuda_libs_patterns = [ 'libcudart.so*', 'libcublas.so*', 'libcublasLt.so*', 'libcudnn*.so*', 'libcufft.so*', 'libcufftw.so*', 'libcurand.so*', 'libcusparse.so*', 'libcusolver.so*', 'libnvrtc.so*', 'libnvToolsExt.so*', # ONNX Runtime GPU dependencies 'libonnxruntime_providers_cuda.so*', 'libonnxruntime_providers_tensorrt.so*', 'libonnxruntime_providers_shared.so*', # Also check without 'lib' prefix (for files in capi directory) 'onnxruntime_providers_cuda.so*', 'onnxruntime_providers_tensorrt.so*', 'onnxruntime_providers_shared.so*', # Python extension module 'onnxruntime_pybind11_state*.so', ] # Collect all matching libraries for lib_dir in lib_dirs: if not os.path.exists(lib_dir): continue for pattern in cuda_libs_patterns: for lib_file in glob.glob(os.path.join(lib_dir, pattern)): if os.path.isfile(lib_file) and not os.path.islink(lib_file): # Add to binaries list with destination directory dest_dir = '.' if 'stubs' in lib_file: dest_dir = 'stubs' cuda_binaries.append((lib_file, dest_dir)) print(f" Including CUDA library: {os.path.basename(lib_file)}") return cuda_binaries def _tree(root, prefix=''): result = [] root = os.path.normpath(root) for dirpath, _dirnames, filenames in os.walk(root): rel = os.path.relpath(dirpath, root) dest = prefix if rel == '.' else os.path.join(prefix, rel) for filename in filenames: result.append((os.path.join(dirpath, filename), dest)) return result def get_rocm_sdk_datas(): """Collect ROCm runtime + libraries wheels if installed (Windows only). We bundle these into the app so end users don't need a separate ROCm install. This follows Strategy A from AMD_ROCM_ACTION_PLAN.md: one libs family per ZIP. """ if sys.platform != 'win32': return [] # Build a list of site-packages roots to probe. roots: list[str] = [] conda_prefix = os.environ.get('CONDA_PREFIX', sys.prefix) roots.append(os.path.join(conda_prefix, 'Lib', 'site-packages')) try: import site roots.extend(site.getsitepackages()) except Exception: pass try: paths = sysconfig.get_paths() roots.extend([paths.get('purelib', ''), paths.get('platlib', '')]) except Exception: pass # De-duplicate while preserving order. seen = set() uniq_roots: list[Path] = [] for r in roots: if not r: continue if r in seen: continue seen.add(r) p = Path(r) if p.exists(): uniq_roots.append(p) core_dir: Path | None = None libs_dir: Path | None = None for sp in uniq_roots: if core_dir is None: candidate = sp / '_rocm_sdk_core' if candidate.exists(): core_dir = candidate if libs_dir is None: # Prefer a pre-renamed/custom directory if present. candidate_custom = sp / '_rocm_sdk_libraries_custom' if candidate_custom.exists(): libs_dir = candidate_custom else: # Otherwise pick the first installed family wheel. matches = sorted(sp.glob('_rocm_sdk_libraries_*')) for m in matches: if m.name == '_rocm_sdk_libraries_custom': continue libs_dir = m break rocm_datas = [] if core_dir is not None: core_bin = core_dir / 'bin' if core_bin.exists(): print(f"Including ROCm core bin from: {core_bin}") rocm_datas += _tree(str(core_bin), prefix=os.path.join('_rocm_sdk_core', 'bin')) else: print(f"Including ROCm core dir from: {core_dir}") rocm_datas += _tree(str(core_dir), prefix='_rocm_sdk_core') else: print("ROCm core dir not found; skipping ROCm runtime bundling") if libs_dir is not None: # Always map into the directory name ctranslate2 expects on Windows. print(f"Including ROCm libraries from: {libs_dir}") rocm_datas += _tree(str(libs_dir), prefix='_rocm_sdk_libraries_custom') else: print("ROCm libraries dir not found; skipping ROCm libraries bundling") return rocm_datas # Collect CUDA/cuDNN libraries cuda_binaries = get_conda_cuda_libs() binaries += cuda_binaries # Collect ROCm runtime + libraries if present (AMD HIP builds) datas += get_rocm_sdk_datas() # Collect CTranslate2 (the actual inference engine for faster-whisper) try: ctranslate2_datas, ctranslate2_binaries, ctranslate2_hiddenimports = collect_all('ctranslate2') datas += ctranslate2_datas binaries += ctranslate2_binaries hiddenimports += ctranslate2_hiddenimports except Exception as e: # Don't hard-fail builds if CTranslate2 can't be imported on the builder # (e.g., missing GPU driver). The module itself is still a dependency of # infer.py and should be pulled in by Analysis; this only affects extras. print(f"Warning: could not collect ctranslate2 extras: {e}") # Collect faster-whisper faster_whisper_datas, faster_whisper_binaries, faster_whisper_hiddenimports = collect_all('faster_whisper') datas += faster_whisper_datas binaries += faster_whisper_binaries hiddenimports += faster_whisper_hiddenimports # Collect transformers (needed for tokenizers) transformers_datas, transformers_binaries, transformers_hiddenimports = collect_all('transformers') datas += transformers_datas binaries += transformers_binaries hiddenimports += transformers_hiddenimports # Collect onnxruntime for VAD model # Note: The Python module is always 'onnxruntime' regardless of whether # you installed onnxruntime-gpu or onnxruntime via pip onnx_collected = False onnx_package = 'onnxruntime' # Module name is always 'onnxruntime' try: onnx_datas, onnx_binaries, onnx_hiddenimports = collect_all(onnx_package) datas += onnx_datas binaries += onnx_binaries hiddenimports += onnx_hiddenimports print(f"Collected {onnx_package} successfully") onnx_collected = True # Explicitly add ONNX Runtime capi libraries if not already included try: import importlib.util spec = importlib.util.find_spec(onnx_package) if spec and spec.origin: onnx_path = os.path.dirname(spec.origin) capi_path = os.path.join(onnx_path, 'capi') if os.path.exists(capi_path): print(f" Found ONNX Runtime capi directory: {capi_path}") for file in os.listdir(capi_path): if file.endswith(('.so', '.dll', '.pyd', '.dylib')): src = os.path.join(capi_path, file) # Add to root directory of the bundle binaries.append((src, '.')) print(f" Added capi library: {file}") except Exception as e: print(f" Warning: Could not collect capi libraries: {e}") except Exception as e: print(f"Could not collect {onnx_package}: {e}") onnx_collected = False if not onnx_collected: print("WARNING: Could not collect any ONNX Runtime package") # Collect librosa for audio processing librosa_datas, librosa_binaries, librosa_hiddenimports = collect_all('librosa') datas += librosa_datas binaries += librosa_binaries hiddenimports += librosa_hiddenimports # Add numpy numpy_datas, numpy_binaries, numpy_hiddenimports = collect_all('numpy') datas += numpy_datas binaries += numpy_binaries hiddenimports += numpy_hiddenimports # Add other necessary packages for package in ['pyjson5', 'scipy', 'soundfile', 'audioread', 'resampy', 'numba', 'av', 'tokenizers']: try: pkg_datas, pkg_binaries, pkg_hiddenimports = collect_all(package) datas += pkg_datas binaries += pkg_binaries hiddenimports += pkg_hiddenimports except: pass # Collect setuptools and pkg_resources data to fix missing modules try: from PyInstaller.utils.hooks import collect_data_files setuptools_datas = collect_data_files('setuptools') datas += setuptools_datas pkg_resources_datas = collect_data_files('pkg_resources') datas += pkg_resources_datas except: pass # Explicitly collect backports module to fix ModuleNotFoundError try: backports_datas, backports_binaries, backports_hiddenimports = collect_all('backports') datas += backports_datas binaries += backports_binaries hiddenimports += backports_hiddenimports print("Collected backports module successfully") except Exception as e: print(f"Could not collect backports module: {e}") # Try alternative collection method try: import backports import os backports_path = os.path.dirname(backports.__file__) datas.append((backports_path, 'backports')) print(f"Added backports from path: {backports_path}") except: print("Warning: backports module not found - may need to be installed") # Add hidden imports for modules that might not be detected automatically hiddenimports += [ 'ctranslate2', 'transformers.models', 'transformers.models.whisper', 'transformers.tokenization_utils', 'transformers.tokenization_utils_base', 'tokenizers', 'tokenizers.implementations', 'tokenizers.models', 'tokenizers.pre_tokenizers', 'tokenizers.processors', 'onnxruntime.capi', 'onnxruntime.capi._pybind_state', 'librosa.core', 'librosa.feature', 'scipy.special._ufuncs_cxx', 'scipy.linalg._fblas', 'scipy.linalg._flapack', 'scipy.linalg._cythonized_array_utils', 'scipy.linalg._solve_toeplitz', 'scipy.linalg._matfuncs_sqrtm_triu', 'scipy.linalg._decomp_lu_cython', 'scipy.linalg._matfuncs_expm', 'scipy.linalg.cython_blas', 'scipy.linalg.cython_lapack', 'numba.core', 'numba.cuda', 'av.audio', 'av.container', 'av.stream', 'pkg_resources.extern', 'pkg_resources._vendor', 'packaging', 'packaging.version', 'packaging.specifiers', 'packaging.requirements', 'backports', # Fix for ModuleNotFoundError 'backports.functools_lru_cache', # Common backports module 'setuptools._vendor.jaraco', # Include jaraco modules 'setuptools._vendor.jaraco.text', 'setuptools._vendor.jaraco.context', 'setuptools._vendor.jaraco.functools', 'code', # For interactive console with --console option 'readline', # For better console experience (if available) 'rlcompleter', # For tab completion in console ] # Add ONNX Runtime provider modules only when present. def _maybe_add_hiddenimport(mod: str) -> None: try: if importlib.util.find_spec(mod) is not None: hiddenimports.append(mod) except Exception: pass _maybe_add_hiddenimport('onnxruntime.capi.onnxruntime_providers_cuda') _maybe_add_hiddenimport('onnxruntime.capi.onnxruntime_providers_tensorrt') _maybe_add_hiddenimport('onnxruntime.capi.onnxruntime_providers_shared') # Add project data files # Note: models directory is excluded and handled separately by CI datas += [ ('src/faster_whisper_transwithai_chickenrice', 'faster_whisper_transwithai_chickenrice'), ('locales', 'locales'), # Include the locales directory with translations ] a = Analysis( ['infer.py'], pathex=[], binaries=binaries, datas=datas, hiddenimports=hiddenimports, hookspath=[], # PyInstaller hooks contrib should be auto-detected hooksconfig={}, runtime_hooks=['runtime_hook.py'], # Add runtime hook to set KMP_DUPLICATE_LIB_OK excludes=[ 'matplotlib', 'tkinter', 'PyQt5', 'PyQt6', 'PySide2', 'PySide6', 'notebook', 'jupyter', 'IPython', 'pytest', ], win_no_prefer_redirects=False, win_private_assemblies=False, cipher=block_cipher, noarchive=False, ) pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) exe = EXE( pyz, a.scripts, [], exclude_binaries=True, name='infer', debug=False, bootloader_ignore_signals=False, strip=False, upx=False, console=True, disable_windowed_traceback=False, argv_emulation=False, target_arch=None, codesign_identity=None, entitlements_file=None, icon='transwithai.ico' if os.path.exists('transwithai.ico') else None, ) coll = COLLECT( exe, a.binaries, a.zipfiles, a.datas, strip=False, upx=False, upx_exclude=[], name='faster_whisper_transwithai_chickenrice', )