# Copyright 2011 The Emscripten Authors. All rights reserved. # Emscripten is available under two separate licenses, the MIT license and the # University of Illinois/NCSA Open Source License. Both these licenses can be # found in the LICENSE file. """Shared code specific to emscripten. General purpose and low-level helpers belong instead in utils.py. """ import atexit import logging import os import re import shlex import signal import subprocess import sys import tempfile from subprocess import PIPE from .toolchain_profiler import ToolchainProfiler assert sys.version_info >= (3, 10), f'emscripten requires python 3.10 or above ({sys.executable} {sys.version})' from . import colored_logger # Configure logging before importing any other local modules so even # log message during import are shown as expected. DEBUG = int(os.environ.get('EMCC_DEBUG', '0')) EMCC_LOGGING = int(os.environ.get('EMCC_LOGGING', '1')) log_level = logging.ERROR if DEBUG: log_level = logging.DEBUG elif EMCC_LOGGING: log_level = logging.INFO # can add %(asctime)s to see timestamps logging.basicConfig(format='%(name)s:%(levelname)s: %(message)s', level=log_level) colored_logger.enable() import contextlib from . import cache, config, diagnostics, filelock, tempfiles, utils from .settings import settings from .utils import exe_path_from_root, exit_with_error, memoize, path_from_root, safe_ensure_dirs DEBUG_SAVE = DEBUG or int(os.environ.get('EMCC_DEBUG_SAVE', '0')) PRINT_SUBPROCS = int(os.getenv('EMCC_VERBOSE', '0')) SKIP_SUBPROCS = False # Minimum node version required to run the emscripten compiler. This is # distinct from the minimum version required to execute the generated code # (settings.MIN_NODE_VERSION). # This is currently set to v18 since this is the version of node available # in debian/stable (bookworm). We need at least v18.3.0 because we make # use of util.parseArg which was added in v18.3.0. MINIMUM_NODE_VERSION = (18, 3, 0) EXPECTED_LLVM_VERSION = 23 # These get set by setup_temp_dirs TEMP_DIR = None EMSCRIPTEN_TEMP_DIR = None logger = logging.getLogger('shared') # warning about absolute-paths is disabled by default, and not enabled by -Wall diagnostics.add_warning('absolute-paths', enabled=False, part_of_all=False) # unused diagnostic flags. TODO(sbc): remove at some point diagnostics.add_warning('almost-asm') diagnostics.add_warning('experimental') # Don't show legacy settings warnings by default # See https://github.com/emscripten-core/emscripten/pull/10615 for the rationale # behind not showing this warning by default. diagnostics.add_warning('legacy-settings', enabled=False, part_of_all=False) # Catch-all for other emcc warnings diagnostics.add_warning('linkflags') diagnostics.add_warning('emcc') diagnostics.add_warning('undefined', error=True) diagnostics.add_warning('deprecated', shared=True) diagnostics.add_warning('version-check') diagnostics.add_warning('export-main') diagnostics.add_warning('map-unrecognized-libraries') diagnostics.add_warning('unused-command-line-argument', shared=True) diagnostics.add_warning('pthreads-mem-growth') diagnostics.add_warning('transpile') diagnostics.add_warning('limited-postlink-optimizations') diagnostics.add_warning('em-js-i64') diagnostics.add_warning('js-compiler') diagnostics.add_warning('compatibility') diagnostics.add_warning('unsupported') diagnostics.add_warning('unused-main') # Closure warning are not (yet) enabled by default diagnostics.add_warning('closure', enabled=False) def returncode_to_str(code): assert code != 0 if code < 0: signal_name = signal.Signals(-code).name return f'received {signal_name} ({code})' return f'returned {code}' def run_multiple_processes(commands, env=None, route_stdout_to_temp_files_suffix=None, cwd=None): """Run multiple subprocess commands. route_stdout_to_temp_files_suffix : string if not None, all stdouts are instead written to files, and an array of filenames is returned. """ if env is None: env = os.environ.copy() std_outs = [] # TODO: Experiment with registering a signal handler here to see if that helps with Ctrl-C locking up the command prompt # when multiple child processes have been spawned. # import signal # def signal_handler(sig, frame): # sys.exit(1) # signal.signal(signal.SIGINT, signal_handler) # Map containing all currently running processes. # command index -> proc/Popen object processes = {} def get_finished_process(): while True: for idx, proc in processes.items(): if proc.poll() is not None: return idx # All processes still running; wait a short while for the first # (oldest) process to finish, then look again if any process has completed. idx, proc = next(iter(processes.items())) try: proc.communicate(timeout=0.2) return idx except subprocess.TimeoutExpired: pass num_parallel_processes = utils.get_num_cores() temp_files = get_temp_files() i = 0 num_completed = 0 while num_completed < len(commands): if i < len(commands) and len(processes) < num_parallel_processes: # Not enough parallel processes running, spawn a new one. if route_stdout_to_temp_files_suffix: stdout = temp_files.get(route_stdout_to_temp_files_suffix) else: stdout = None if DEBUG: logger.debug('Running subprocess %d/%d: %s' % (i + 1, len(commands), ' '.join(commands[i]))) print_compiler_stage(commands[i]) proc = subprocess.Popen(commands[i], stdout=stdout, stderr=None, env=env, cwd=cwd) processes[i] = proc if route_stdout_to_temp_files_suffix: std_outs.append((i, stdout.name)) i += 1 else: # Not spawning a new process (Too many commands running in parallel, or # no commands left): find if a process has finished. idx = get_finished_process() finished_process = processes.pop(idx) if finished_process.returncode != 0: exit_with_error('subprocess %d/%d failed (%s)! (cmdline: %s)' % (idx + 1, len(commands), returncode_to_str(finished_process.returncode), shlex.join(commands[idx]))) num_completed += 1 if route_stdout_to_temp_files_suffix: # If processes finished out of order, sort the results to the order of the input. std_outs.sort(key=lambda x: x[0]) return [x[1] for x in std_outs] def check_call(cmd, *args, **kw): """Like `run_process` above but treat failures as fatal and exit_with_error.""" print_compiler_stage(cmd) if SKIP_SUBPROCS: return 0 try: return utils.run_process(cmd, *args, **kw) except subprocess.CalledProcessError as e: exit_with_error("'%s' failed (%s)", shlex.join(cmd), returncode_to_str(e.returncode)) except OSError as e: exit_with_error("'%s' failed: %s", shlex.join(cmd), e) def exec_process(cmd): print_compiler_stage(cmd) utils.exec(cmd) def run_js_tool(filename, jsargs=[], node_args=[], **kw): # noqa: B006 """Execute a javascript tool. This is used by emcc to run parts of the build process that are implemented in javascript. """ command = config.NODE_JS + node_args + [filename] + jsargs return check_call(command, **kw).stdout def get_npm_cmd(name, missing_ok=False): if utils.WINDOWS: cmd = [path_from_root('node_modules/.bin', name + '.cmd')] else: cmd = config.NODE_JS + [path_from_root('node_modules/.bin', name)] if not os.path.exists(cmd[-1]): if missing_ok: return None else: exit_with_error(f'{name} was not found! Please run "npm install" in Emscripten root directory to set up npm dependencies') return cmd @memoize def get_clang_version(): if not os.path.exists(CLANG_CC): exit_with_error('clang executable not found at `%s`' % CLANG_CC) proc = check_call([CLANG_CC, '--version'], stdout=PIPE) m = re.search(r'[Vv]ersion\s+(\d+\.\d+)', proc.stdout) return m and m.group(1) def check_llvm_version(): actual = get_clang_version() if actual.startswith('%d.' % EXPECTED_LLVM_VERSION): return True # When running in CI environment we also silently allow the next major # version of LLVM here so that new versions of LLVM can be rolled in # without disruption. if 'BUILDBOT_BUILDNUMBER' in os.environ: if actual.startswith('%d.' % (EXPECTED_LLVM_VERSION + 1)): return True diagnostics.warning('version-check', 'LLVM version for clang executable "%s" appears incorrect (seeing "%s", expected "%s")', CLANG_CC, actual, EXPECTED_LLVM_VERSION) return False def get_clang_targets(): if not os.path.exists(CLANG_CC): exit_with_error('clang executable not found at `%s`' % CLANG_CC) try: target_info = utils.run_process([CLANG_CC, '-print-targets'], stdout=PIPE).stdout except subprocess.CalledProcessError: exit_with_error('error running `clang -print-targets`. Check your llvm installation (%s)' % CLANG_CC) if 'Registered Targets:' not in target_info: exit_with_error('error parsing output of `clang -print-targets`. Check your llvm installation (%s)' % CLANG_CC) return target_info.split('Registered Targets:')[1] def check_llvm(): targets = get_clang_targets() if 'wasm32' not in targets: logger.critical('LLVM has not been built with the WebAssembly backend, clang reports:') print('===========================================================================', file=sys.stderr) print(targets, file=sys.stderr) print('===========================================================================', file=sys.stderr) return False return True def get_node_directory(): return os.path.dirname(config.NODE_JS[0] if type(config.NODE_JS) is list else config.NODE_JS) # When we run some tools from npm (closure, html-minifier-terser), those # expect that the tools have node.js accessible in PATH. Place our node # there when invoking those tools. def env_with_node_in_path(): env = os.environ.copy() env['PATH'] = get_node_directory() + os.pathsep + env['PATH'] return env def _get_node_version_pair(nodejs): actual = utils.run_process(nodejs + ['--version'], stdout=PIPE).stdout.strip() version = actual.removeprefix('v') version = version.split('-')[0].split('.') version = tuple(int(v) for v in version) return actual, version def get_node_version(nodejs): if not nodejs: return None return _get_node_version_pair(nodejs)[1] @memoize def check_node_version(): try: actual, version = _get_node_version_pair(config.NODE_JS) except Exception as e: diagnostics.warning('version-check', 'cannot check node version: %s', e) return # Skip the version check is we are running `bun` instead of node. if version < MINIMUM_NODE_VERSION and 'bun' not in os.path.basename(config.NODE_JS[0]): expected = '.'.join(str(v) for v in MINIMUM_NODE_VERSION) diagnostics.warning('version-check', f'node version appears too old (seeing "{actual}", expected "v{expected}")') return version def node_reference_types_flags(nodejs): node_version = get_node_version(nodejs) # reference types were enabled by default in node v18. if node_version and node_version < (18, 0, 0): return ['--experimental-wasm-reftypes'] else: return [] def node_exception_flags(nodejs): node_version = get_node_version(nodejs) # Legacy exception handling was enabled by default in node v17. if node_version and node_version < (17, 0, 0): return ['--experimental-wasm-eh'] # Standard exception handling was supported behind flag in node v22. if node_version and node_version >= (22, 0, 0) and not settings.WASM_LEGACY_EXCEPTIONS: return ['--experimental-wasm-exnref'] return [] @memoize @ToolchainProfiler.profile() def check_node(): try: utils.run_process(config.NODE_JS + ['-e', 'console.log("hello")'], stdout=PIPE) except Exception as e: exit_with_error('the configured node executable (%s) does not seem to work, check the paths in %s (%s)', config.NODE_JS, config.EM_CONFIG, e) def generate_sanity(): return f'{utils.EMSCRIPTEN_VERSION}|{config.LLVM_ROOT}\n' @memoize def perform_sanity_checks(quiet=False): # some warning, mostly not fatal checks - do them even if EM_IGNORE_SANITY is on check_node_version() check_llvm_version() llvm_ok = check_llvm() if os.environ.get('EM_IGNORE_SANITY'): logger.info('EM_IGNORE_SANITY set, ignoring sanity checks') return if not quiet: logger.info('(Emscripten: Running sanity checks)') if not llvm_ok: exit_with_error('failing sanity checks due to previous llvm failure') check_node() with ToolchainProfiler.profile_block('sanity LLVM'): for cmd in (CLANG_CC, LLVM_AR): if not os.path.exists(cmd) and not os.path.exists(cmd + '.exe'): # .exe extension required for Windows exit_with_error('cannot find %s, check the paths in %s', cmd, config.EM_CONFIG) @ToolchainProfiler.profile() def check_sanity(force=False, quiet=False): """Check that basic stuff we need (Node.js, and Clang and LLVM) exists. The test runner always does this check (through |force|). emcc does this less frequently, only when ${EM_CONFIG}_sanity does not exist or is older than EM_CONFIG (so, we re-check sanity when the settings are changed). We also re-check sanity and clear the cache when the version changes. """ if not force and os.environ.get('EMCC_SKIP_SANITY_CHECK') == '1': return # We set EMCC_SKIP_SANITY_CHECK so that any subprocesses that we launch will # not re-run the tests. os.environ['EMCC_SKIP_SANITY_CHECK'] = '1' # In DEBUG mode we perform the sanity checks even when # early return due to the file being up-to-date. if DEBUG: force = True if config.FROZEN_CACHE: if force: perform_sanity_checks(quiet) return if os.environ.get('EM_IGNORE_SANITY'): perform_sanity_checks(quiet) return expected = generate_sanity() sanity_file = cache.get_path('sanity.txt') def sanity_is_correct(): sanity_data = None # We can't simply check for the existence of sanity_file and then read from # it here because we don't hold the cache lock yet and some other process # could clear the cache between checking for, and reading from, the file. with contextlib.suppress(Exception): sanity_data = utils.read_file(sanity_file) if sanity_data == expected: logger.debug(f'sanity file up-to-date: {sanity_file}') # Even if the sanity file is up-to-date we still run the checks # when force is set. if force: perform_sanity_checks(quiet) return True # all is well return False if sanity_is_correct(): # Early return without taking the cache lock return with cache.lock('sanity'): # Check again once the cache lock as acquired if sanity_is_correct(): return if os.path.exists(sanity_file): sanity_data = utils.read_file(sanity_file) logger.info('old sanity: %s', sanity_data.strip()) logger.info('new sanity: %s', expected.strip()) logger.info('(Emscripten: config changed, clearing cache)') cache.erase() else: logger.debug(f'sanity file not found: {sanity_file}') perform_sanity_checks() # Only create/update this file if the sanity check succeeded, i.e., we got here utils.write_file(sanity_file, expected) def llvm_tool_path_with_suffix(tool, suffix): if suffix: tool += '-' + suffix llvm_root = os.path.expanduser(config.LLVM_ROOT) return utils.find_exe(llvm_root, tool) # Some distributions ship with multiple llvm versions so they add # the version to the binaries, cope with that def llvm_tool_path(tool): return llvm_tool_path_with_suffix(tool, config.LLVM_ADD_VERSION) # Some distributions ship with multiple clang versions so they add # the version to the binaries, cope with that def clang_tool_path(tool): return llvm_tool_path_with_suffix(tool, config.CLANG_ADD_VERSION) # In MINIMAL_RUNTIME mode, keep suffixes of generated files simple # ('.mem' instead of '.js.mem'; .'symbols' instead of '.js.symbols' etc) # Retain the original naming scheme in traditional runtime. def replace_or_append_suffix(filename, new_suffix): assert new_suffix[0] == '.' return utils.replace_suffix(filename, new_suffix) if settings.MINIMAL_RUNTIME else filename + new_suffix # Temp dir. Create a random one, unless EMCC_DEBUG is set, in which case use the canonical # temp directory (TEMP_DIR/emscripten_temp). @memoize def get_emscripten_temp_dir(): """Return path of EMSCRIPTEN_TEMP_DIR, creating one if it didn't exist.""" global EMSCRIPTEN_TEMP_DIR if not EMSCRIPTEN_TEMP_DIR: EMSCRIPTEN_TEMP_DIR = tempfile.mkdtemp(prefix='emscripten_temp_', dir=TEMP_DIR) if not DEBUG_SAVE: def prepare_to_clean_temp(d): def clean_temp(): utils.delete_dir(d) atexit.register(clean_temp) # this global var might change later prepare_to_clean_temp(EMSCRIPTEN_TEMP_DIR) return EMSCRIPTEN_TEMP_DIR def in_temp(name): return os.path.join(get_emscripten_temp_dir(), os.path.basename(name)) def get_canonical_temp_dir(temp_dir): return os.path.join(temp_dir, 'emscripten_temp') def setup_temp_dirs(): global EMSCRIPTEN_TEMP_DIR, CANONICAL_TEMP_DIR, TEMP_DIR EMSCRIPTEN_TEMP_DIR = None TEMP_DIR = os.environ.get("EMCC_TEMP_DIR", tempfile.gettempdir()) if not os.path.isdir(TEMP_DIR): exit_with_error(f'The temporary directory `{TEMP_DIR}` does not exist! Please make sure that the path is correct.') CANONICAL_TEMP_DIR = get_canonical_temp_dir(TEMP_DIR) if DEBUG: EMSCRIPTEN_TEMP_DIR = CANONICAL_TEMP_DIR try: safe_ensure_dirs(EMSCRIPTEN_TEMP_DIR) except Exception as e: exit_with_error('error creating canonical temp dir (Check definition of TEMP_DIR in %s): %s', config.EM_CONFIG, e) # Since the canonical temp directory is, by definition, the same # between all processes that run in DEBUG mode we need to use a multi # process lock to prevent more than one process from writing to it. # This is because emcc assumes that it can use non-unique names inside # the temp directory. # Sadly we need to allow child processes to access this directory # though, since emcc can recursively call itself when building # libraries and ports. if 'EM_HAVE_TEMP_DIR_LOCK' not in os.environ: filelock_name = os.path.join(EMSCRIPTEN_TEMP_DIR, 'emscripten.lock') lock = filelock.FileLock(filelock_name) os.environ['EM_HAVE_TEMP_DIR_LOCK'] = '1' lock.acquire() atexit.register(lock.release) @memoize def get_temp_files(): if DEBUG_SAVE: # In debug mode store all temp files in the emscripten-specific temp dir # and don't worry about cleaning them up. return tempfiles.TempFiles(get_emscripten_temp_dir(), save_debug_files=True) else: # Otherwise use the system tempdir and try to clean up after ourselves. return tempfiles.TempFiles(TEMP_DIR, save_debug_files=False) def print_compiler_stage(cmd): """Emulate the '-v/-###' flags of clang/gcc by printing the sub-commands that we run.""" def maybe_quote(arg): if all(c.isalnum() or c in './-_' for c in arg): return arg else: return f'"{arg}"' if SKIP_SUBPROCS: print(' ' + ' '.join([maybe_quote(a) for a in cmd]), file=sys.stderr) sys.stderr.flush() elif PRINT_SUBPROCS: print(' %s %s' % (maybe_quote(cmd[0]), shlex.join(cmd[1:])), file=sys.stderr) sys.stderr.flush() def demangle_c_symbol_name(name): if not is_c_symbol(name): return '$' + name return name[1:] if name.startswith('_') else name def is_c_symbol(name): return name.startswith('_') def is_internal_global(name): internal_start_stop_symbols = {'__start_em_asm', '__stop_em_asm', '__start_em_js', '__stop_em_js', '__start_em_lib_deps', '__stop_em_lib_deps', '__em_lib_deps'} internal_prefixes = ('__em_js__', '__em_lib_deps') return name in internal_start_stop_symbols or any(name.startswith(p) for p in internal_prefixes) def is_user_export(name): if is_internal_global(name): return False return name not in {'__asyncify_data', '__asyncify_state', '__indirect_function_table', 'memory'} and not name.startswith(('dynCall_', 'orig$')) def asmjs_mangle(name): """Mangle a name the way asm.js/JSBackend globals are mangled. Prepends '_' and replaces non-alphanumerics with '_'. Used by wasm backend for JS library consistency with asm.js. """ # We also use this function to convert the clang-mangled `__main_argc_argv` # to simply `main` which is expected by the emscripten JS glue code. if name == '__main_argc_argv': name = 'main' if is_user_export(name): return '_' + name return name def do_replace(input_, pattern, replacement): if pattern not in input_: exit_with_error('expected to find pattern in input JS: %s' % pattern) return input_.replace(pattern, replacement) def get_llvm_target(): if settings.MEMORY64: return 'wasm64-unknown-emscripten' else: return 'wasm32-unknown-emscripten' def init(): utils.set_version_globals() setup_temp_dirs() # ============================================================================ # End declarations. # ============================================================================ # Everything below this point is top level code that get run when importing this # file. TODO(sbc): We should try to reduce that amount we do here and instead # have consumers explicitly call initialization functions. CLANG_CC = clang_tool_path('clang') CLANG_CXX = clang_tool_path('clang++') CLANG_SCAN_DEPS = llvm_tool_path('clang-scan-deps') LLVM_AR = llvm_tool_path('llvm-ar') LLVM_DWP = llvm_tool_path('llvm-dwp') LLVM_RANLIB = llvm_tool_path('llvm-ranlib') LLVM_NM = llvm_tool_path('llvm-nm') LLVM_DWARFDUMP = llvm_tool_path('llvm-dwarfdump') LLVM_OBJCOPY = llvm_tool_path('llvm-objcopy') WASM_LD = llvm_tool_path('wasm-ld') LLVM_PROFDATA = llvm_tool_path('llvm-profdata') LLVM_COV = llvm_tool_path('llvm-cov') EMCC = exe_path_from_root('emcc') EMXX = exe_path_from_root('em++') EMAR = exe_path_from_root('emar') EMRANLIB = exe_path_from_root('emranlib') FILE_PACKAGER = exe_path_from_root('tools/file_packager') # Windows .dll suffix is not included in this list, since those are never # linked to directly on the command line. DYLIB_EXTENSIONS = ['.dylib', '.so'] run_via_emxx = False init()