# Copyright 2011 The Emscripten Authors.  All rights reserved.
# Emscripten is available under two separate licenses, the MIT license and the
# University of Illinois/NCSA Open Source License.  Both these licenses can be
# found in the LICENSE file.

"""Shared code specific to emscripten.

General purpose and low-level helpers belong instead in utils.py.
"""

import atexit
import logging
import os
import re
import shlex
import signal
import subprocess
import sys
import tempfile
from subprocess import PIPE

from .toolchain_profiler import ToolchainProfiler

assert sys.version_info >= (3, 10), f'emscripten requires python 3.10 or above ({sys.executable} {sys.version})'

from . import colored_logger

# Configure logging before importing any other local modules so even
# log message during import are shown as expected.
DEBUG = int(os.environ.get('EMCC_DEBUG', '0'))
EMCC_LOGGING = int(os.environ.get('EMCC_LOGGING', '1'))
log_level = logging.ERROR
if DEBUG:
  log_level = logging.DEBUG
elif EMCC_LOGGING:
  log_level = logging.INFO
# can add  %(asctime)s  to see timestamps
logging.basicConfig(format='%(name)s:%(levelname)s: %(message)s', level=log_level)
colored_logger.enable()

import contextlib

from . import cache, config, diagnostics, filelock, tempfiles, utils
from .settings import settings
from .utils import exe_path_from_root, exit_with_error, memoize, path_from_root, safe_ensure_dirs

DEBUG_SAVE = DEBUG or int(os.environ.get('EMCC_DEBUG_SAVE', '0'))
PRINT_SUBPROCS = int(os.getenv('EMCC_VERBOSE', '0'))
SKIP_SUBPROCS = False

# Minimum node version required to run the emscripten compiler.  This is
# distinct from the minimum version required to execute the generated code
# (settings.MIN_NODE_VERSION).
# This is currently set to v18 since this is the version of node available
# in debian/stable (bookworm).  We need at least v18.3.0 because we make
# use of util.parseArg which was added in v18.3.0.
MINIMUM_NODE_VERSION = (18, 3, 0)
EXPECTED_LLVM_VERSION = 23

# These get set by setup_temp_dirs
TEMP_DIR = None
EMSCRIPTEN_TEMP_DIR = None

logger = logging.getLogger('shared')

# warning about absolute-paths is disabled by default, and not enabled by -Wall
diagnostics.add_warning('absolute-paths', enabled=False, part_of_all=False)
# unused diagnostic flags.  TODO(sbc): remove at some point
diagnostics.add_warning('almost-asm')
diagnostics.add_warning('experimental')
# Don't show legacy settings warnings by default
# See https://github.com/emscripten-core/emscripten/pull/10615 for the rationale
# behind not showing this warning by default.
diagnostics.add_warning('legacy-settings', enabled=False, part_of_all=False)
# Catch-all for other emcc warnings
diagnostics.add_warning('linkflags')
diagnostics.add_warning('emcc')
diagnostics.add_warning('undefined', error=True)
diagnostics.add_warning('deprecated', shared=True)
diagnostics.add_warning('version-check')
diagnostics.add_warning('export-main')
diagnostics.add_warning('map-unrecognized-libraries')
diagnostics.add_warning('unused-command-line-argument', shared=True)
diagnostics.add_warning('pthreads-mem-growth')
diagnostics.add_warning('transpile')
diagnostics.add_warning('limited-postlink-optimizations')
diagnostics.add_warning('em-js-i64')
diagnostics.add_warning('js-compiler')
diagnostics.add_warning('compatibility')
diagnostics.add_warning('unsupported')
diagnostics.add_warning('unused-main')
# Closure warning are not (yet) enabled by default
diagnostics.add_warning('closure', enabled=False)


def returncode_to_str(code):
  assert code != 0
  if code < 0:
    signal_name = signal.Signals(-code).name
    return f'received {signal_name} ({code})'

  return f'returned {code}'


def run_multiple_processes(commands,
                           env=None,
                           route_stdout_to_temp_files_suffix=None,
                           cwd=None):
  """Run multiple subprocess commands.

  route_stdout_to_temp_files_suffix : string
    if not None, all stdouts are instead written to files, and an array
    of filenames is returned.
  """
  if env is None:
    env = os.environ.copy()

  std_outs = []

  # TODO: Experiment with registering a signal handler here to see if that helps with Ctrl-C locking up the command prompt
  # when multiple child processes have been spawned.
  # import signal
  # def signal_handler(sig, frame):
  #   sys.exit(1)
  # signal.signal(signal.SIGINT, signal_handler)

  # Map containing all currently running processes.
  # command index -> proc/Popen object
  processes = {}

  def get_finished_process():
    while True:
      for idx, proc in processes.items():
        if proc.poll() is not None:
          return idx
      # All processes still running; wait a short while for the first
      # (oldest) process to finish, then look again if any process has completed.
      idx, proc = next(iter(processes.items()))
      try:
        proc.communicate(timeout=0.2)
        return idx
      except subprocess.TimeoutExpired:
        pass

  num_parallel_processes = utils.get_num_cores()
  temp_files = get_temp_files()
  i = 0
  num_completed = 0
  while num_completed < len(commands):
    if i < len(commands) and len(processes) < num_parallel_processes:
      # Not enough parallel processes running, spawn a new one.
      if route_stdout_to_temp_files_suffix:
        stdout = temp_files.get(route_stdout_to_temp_files_suffix)
      else:
        stdout = None
      if DEBUG:
        logger.debug('Running subprocess %d/%d: %s' % (i + 1, len(commands), ' '.join(commands[i])))
      print_compiler_stage(commands[i])
      proc = subprocess.Popen(commands[i], stdout=stdout, stderr=None, env=env, cwd=cwd)
      processes[i] = proc
      if route_stdout_to_temp_files_suffix:
        std_outs.append((i, stdout.name))
      i += 1
    else:
      # Not spawning a new process (Too many commands running in parallel, or
      # no commands left): find if a process has finished.
      idx = get_finished_process()
      finished_process = processes.pop(idx)
      if finished_process.returncode != 0:
        exit_with_error('subprocess %d/%d failed (%s)! (cmdline: %s)' % (idx + 1, len(commands), returncode_to_str(finished_process.returncode), shlex.join(commands[idx])))
      num_completed += 1

  if route_stdout_to_temp_files_suffix:
    # If processes finished out of order, sort the results to the order of the input.
    std_outs.sort(key=lambda x: x[0])
    return [x[1] for x in std_outs]


def check_call(cmd, *args, **kw):
  """Like `run_process` above but treat failures as fatal and exit_with_error."""
  print_compiler_stage(cmd)
  if SKIP_SUBPROCS:
    return 0
  try:
    return utils.run_process(cmd, *args, **kw)
  except subprocess.CalledProcessError as e:
    exit_with_error("'%s' failed (%s)", shlex.join(cmd), returncode_to_str(e.returncode))
  except OSError as e:
    exit_with_error("'%s' failed: %s", shlex.join(cmd), e)


def exec_process(cmd):
  print_compiler_stage(cmd)
  utils.exec(cmd)


def run_js_tool(filename, jsargs=[], node_args=[], **kw):  # noqa: B006
  """Execute a javascript tool.

  This is used by emcc to run parts of the build process that are
  implemented in javascript.
  """
  command = config.NODE_JS + node_args + [filename] + jsargs
  return check_call(command, **kw).stdout


def get_npm_cmd(name, missing_ok=False):
  if utils.WINDOWS:
    cmd = [path_from_root('node_modules/.bin', name + '.cmd')]
  else:
    cmd = config.NODE_JS + [path_from_root('node_modules/.bin', name)]
  if not os.path.exists(cmd[-1]):
    if missing_ok:
      return None
    else:
      exit_with_error(f'{name} was not found! Please run "npm install" in Emscripten root directory to set up npm dependencies')
  return cmd


@memoize
def get_clang_version():
  if not os.path.exists(CLANG_CC):
    exit_with_error('clang executable not found at `%s`' % CLANG_CC)
  proc = check_call([CLANG_CC, '--version'], stdout=PIPE)
  m = re.search(r'[Vv]ersion\s+(\d+\.\d+)', proc.stdout)
  return m and m.group(1)


def check_llvm_version():
  actual = get_clang_version()
  if actual.startswith('%d.' % EXPECTED_LLVM_VERSION):
    return True
  # When running in CI environment we also silently allow the next major
  # version of LLVM here so that new versions of LLVM can be rolled in
  # without disruption.
  if 'BUILDBOT_BUILDNUMBER' in os.environ:
    if actual.startswith('%d.' % (EXPECTED_LLVM_VERSION + 1)):
      return True
  diagnostics.warning('version-check', 'LLVM version for clang executable "%s" appears incorrect (seeing "%s", expected "%s")', CLANG_CC, actual, EXPECTED_LLVM_VERSION)
  return False


def get_clang_targets():
  if not os.path.exists(CLANG_CC):
    exit_with_error('clang executable not found at `%s`' % CLANG_CC)
  try:
    target_info = utils.run_process([CLANG_CC, '-print-targets'], stdout=PIPE).stdout
  except subprocess.CalledProcessError:
    exit_with_error('error running `clang -print-targets`.  Check your llvm installation (%s)' % CLANG_CC)
  if 'Registered Targets:' not in target_info:
    exit_with_error('error parsing output of `clang -print-targets`.  Check your llvm installation (%s)' % CLANG_CC)
  return target_info.split('Registered Targets:')[1]


def check_llvm():
  targets = get_clang_targets()
  if 'wasm32' not in targets:
    logger.critical('LLVM has not been built with the WebAssembly backend, clang reports:')
    print('===========================================================================', file=sys.stderr)
    print(targets, file=sys.stderr)
    print('===========================================================================', file=sys.stderr)
    return False

  return True


def get_node_directory():
  return os.path.dirname(config.NODE_JS[0] if type(config.NODE_JS) is list else config.NODE_JS)


# When we run some tools from npm (closure, html-minifier-terser), those
# expect that the tools have node.js accessible in PATH. Place our node
# there when invoking those tools.
def env_with_node_in_path():
  env = os.environ.copy()
  env['PATH'] = get_node_directory() + os.pathsep + env['PATH']
  return env


def _get_node_version_pair(nodejs):
  actual = utils.run_process(nodejs + ['--version'], stdout=PIPE).stdout.strip()
  version = actual.removeprefix('v')
  version = version.split('-')[0].split('.')
  version = tuple(int(v) for v in version)
  return actual, version


def get_node_version(nodejs):
  if not nodejs:
    return None
  return _get_node_version_pair(nodejs)[1]


@memoize
def check_node_version():
  try:
    actual, version = _get_node_version_pair(config.NODE_JS)
  except Exception as e:
    diagnostics.warning('version-check', 'cannot check node version: %s', e)
    return

  # Skip the version check is we are running `bun` instead of node.
  if version < MINIMUM_NODE_VERSION and 'bun' not in os.path.basename(config.NODE_JS[0]):
    expected = '.'.join(str(v) for v in MINIMUM_NODE_VERSION)
    diagnostics.warning('version-check', f'node version appears too old (seeing "{actual}", expected "v{expected}")')

  return version


def node_reference_types_flags(nodejs):
  node_version = get_node_version(nodejs)
  # reference types were enabled by default in node v18.
  if node_version and node_version < (18, 0, 0):
    return ['--experimental-wasm-reftypes']
  else:
    return []


def node_exception_flags(nodejs):
  node_version = get_node_version(nodejs)
  # Legacy exception handling was enabled by default in node v17.
  if node_version and node_version < (17, 0, 0):
    return ['--experimental-wasm-eh']
  # Standard exception handling was supported behind flag in node v22.
  if node_version and node_version >= (22, 0, 0) and not settings.WASM_LEGACY_EXCEPTIONS:
    return ['--experimental-wasm-exnref']
  return []


@memoize
@ToolchainProfiler.profile()
def check_node():
  try:
    utils.run_process(config.NODE_JS + ['-e', 'console.log("hello")'], stdout=PIPE)
  except Exception as e:
    exit_with_error('the configured node executable (%s) does not seem to work, check the paths in %s (%s)', config.NODE_JS, config.EM_CONFIG, e)


def generate_sanity():
  return f'{utils.EMSCRIPTEN_VERSION}|{config.LLVM_ROOT}\n'


@memoize
def perform_sanity_checks(quiet=False):
  # some warning, mostly not fatal checks - do them even if EM_IGNORE_SANITY is on
  check_node_version()
  check_llvm_version()

  llvm_ok = check_llvm()

  if os.environ.get('EM_IGNORE_SANITY'):
    logger.info('EM_IGNORE_SANITY set, ignoring sanity checks')
    return

  if not quiet:
    logger.info('(Emscripten: Running sanity checks)')

  if not llvm_ok:
    exit_with_error('failing sanity checks due to previous llvm failure')

  check_node()

  with ToolchainProfiler.profile_block('sanity LLVM'):
    for cmd in (CLANG_CC, LLVM_AR):
      if not os.path.exists(cmd) and not os.path.exists(cmd + '.exe'):  # .exe extension required for Windows
        exit_with_error('cannot find %s, check the paths in %s', cmd, config.EM_CONFIG)


@ToolchainProfiler.profile()
def check_sanity(force=False, quiet=False):
  """Check that basic stuff we need (Node.js, and Clang and LLVM) exists.

  The test runner always does this check (through |force|). emcc does this less
  frequently, only when ${EM_CONFIG}_sanity does not exist or is older than
  EM_CONFIG (so, we re-check sanity when the settings are changed).  We also
  re-check sanity and clear the cache when the version changes.
  """
  if not force and os.environ.get('EMCC_SKIP_SANITY_CHECK') == '1':
    return

  # We set EMCC_SKIP_SANITY_CHECK so that any subprocesses that we launch will
  # not re-run the tests.
  os.environ['EMCC_SKIP_SANITY_CHECK'] = '1'

  # In DEBUG mode we perform the sanity checks even when
  # early return due to the file being up-to-date.
  if DEBUG:
    force = True

  if config.FROZEN_CACHE:
    if force:
      perform_sanity_checks(quiet)
    return

  if os.environ.get('EM_IGNORE_SANITY'):
    perform_sanity_checks(quiet)
    return

  expected = generate_sanity()

  sanity_file = cache.get_path('sanity.txt')

  def sanity_is_correct():
    sanity_data = None
    # We can't simply check for the existence of sanity_file and then read from
    # it here because we don't hold the cache lock yet and some other process
    # could clear the cache between checking for, and reading from, the file.
    with contextlib.suppress(Exception):
      sanity_data = utils.read_file(sanity_file)
    if sanity_data == expected:
      logger.debug(f'sanity file up-to-date: {sanity_file}')
      # Even if the sanity file is up-to-date we still run the checks
      # when force is set.
      if force:
        perform_sanity_checks(quiet)
      return True # all is well
    return False

  if sanity_is_correct():
    # Early return without taking the cache lock
    return

  with cache.lock('sanity'):
    # Check again once the cache lock as acquired
    if sanity_is_correct():
      return

    if os.path.exists(sanity_file):
      sanity_data = utils.read_file(sanity_file)
      logger.info('old sanity: %s', sanity_data.strip())
      logger.info('new sanity: %s', expected.strip())
      logger.info('(Emscripten: config changed, clearing cache)')
      cache.erase()
    else:
      logger.debug(f'sanity file not found: {sanity_file}')

    perform_sanity_checks()

    # Only create/update this file if the sanity check succeeded, i.e., we got here
    utils.write_file(sanity_file, expected)


def llvm_tool_path_with_suffix(tool, suffix):
  if suffix:
    tool += '-' + suffix
  llvm_root = os.path.expanduser(config.LLVM_ROOT)
  return utils.find_exe(llvm_root, tool)


# Some distributions ship with multiple llvm versions so they add
# the version to the binaries, cope with that
def llvm_tool_path(tool):
  return llvm_tool_path_with_suffix(tool, config.LLVM_ADD_VERSION)


# Some distributions ship with multiple clang versions so they add
# the version to the binaries, cope with that
def clang_tool_path(tool):
  return llvm_tool_path_with_suffix(tool, config.CLANG_ADD_VERSION)


# In MINIMAL_RUNTIME mode, keep suffixes of generated files simple
# ('.mem' instead of '.js.mem'; .'symbols' instead of '.js.symbols' etc)
# Retain the original naming scheme in traditional runtime.
def replace_or_append_suffix(filename, new_suffix):
  assert new_suffix[0] == '.'
  return utils.replace_suffix(filename, new_suffix) if settings.MINIMAL_RUNTIME else filename + new_suffix


# Temp dir. Create a random one, unless EMCC_DEBUG is set, in which case use the canonical
# temp directory (TEMP_DIR/emscripten_temp).
@memoize
def get_emscripten_temp_dir():
  """Return path of EMSCRIPTEN_TEMP_DIR, creating one if it didn't exist."""
  global EMSCRIPTEN_TEMP_DIR
  if not EMSCRIPTEN_TEMP_DIR:
    EMSCRIPTEN_TEMP_DIR = tempfile.mkdtemp(prefix='emscripten_temp_', dir=TEMP_DIR)

    if not DEBUG_SAVE:
      def prepare_to_clean_temp(d):
        def clean_temp():
          utils.delete_dir(d)

        atexit.register(clean_temp)
      # this global var might change later
      prepare_to_clean_temp(EMSCRIPTEN_TEMP_DIR)
  return EMSCRIPTEN_TEMP_DIR


def in_temp(name):
  return os.path.join(get_emscripten_temp_dir(), os.path.basename(name))


def get_canonical_temp_dir(temp_dir):
  return os.path.join(temp_dir, 'emscripten_temp')


def setup_temp_dirs():
  global EMSCRIPTEN_TEMP_DIR, CANONICAL_TEMP_DIR, TEMP_DIR
  EMSCRIPTEN_TEMP_DIR = None

  TEMP_DIR = os.environ.get("EMCC_TEMP_DIR", tempfile.gettempdir())
  if not os.path.isdir(TEMP_DIR):
    exit_with_error(f'The temporary directory `{TEMP_DIR}` does not exist! Please make sure that the path is correct.')

  CANONICAL_TEMP_DIR = get_canonical_temp_dir(TEMP_DIR)

  if DEBUG:
    EMSCRIPTEN_TEMP_DIR = CANONICAL_TEMP_DIR
    try:
      safe_ensure_dirs(EMSCRIPTEN_TEMP_DIR)
    except Exception as e:
      exit_with_error('error creating canonical temp dir (Check definition of TEMP_DIR in %s): %s', config.EM_CONFIG, e)

    # Since the canonical temp directory is, by definition, the same
    # between all processes that run in DEBUG mode we need to use a multi
    # process lock to prevent more than one process from writing to it.
    # This is because emcc assumes that it can use non-unique names inside
    # the temp directory.
    # Sadly we need to allow child processes to access this directory
    # though, since emcc can recursively call itself when building
    # libraries and ports.
    if 'EM_HAVE_TEMP_DIR_LOCK' not in os.environ:
      filelock_name = os.path.join(EMSCRIPTEN_TEMP_DIR, 'emscripten.lock')
      lock = filelock.FileLock(filelock_name)
      os.environ['EM_HAVE_TEMP_DIR_LOCK'] = '1'
      lock.acquire()
      atexit.register(lock.release)


@memoize
def get_temp_files():
  if DEBUG_SAVE:
    # In debug mode store all temp files in the emscripten-specific temp dir
    # and don't worry about cleaning them up.
    return tempfiles.TempFiles(get_emscripten_temp_dir(), save_debug_files=True)
  else:
    # Otherwise use the system tempdir and try to clean up after ourselves.
    return tempfiles.TempFiles(TEMP_DIR, save_debug_files=False)


def print_compiler_stage(cmd):
  """Emulate the '-v/-###' flags of clang/gcc by printing the sub-commands that we run."""

  def maybe_quote(arg):
    if all(c.isalnum() or c in './-_' for c in arg):
      return arg
    else:
      return f'"{arg}"'

  if SKIP_SUBPROCS:
    print(' ' + ' '.join([maybe_quote(a) for a in cmd]), file=sys.stderr)
    sys.stderr.flush()
  elif PRINT_SUBPROCS:
    print(' %s %s' % (maybe_quote(cmd[0]), shlex.join(cmd[1:])), file=sys.stderr)
    sys.stderr.flush()


def demangle_c_symbol_name(name):
  if not is_c_symbol(name):
    return '$' + name
  return name[1:] if name.startswith('_') else name


def is_c_symbol(name):
  return name.startswith('_')


def is_internal_global(name):
  internal_start_stop_symbols = {'__start_em_asm', '__stop_em_asm',
                                 '__start_em_js', '__stop_em_js',
                                 '__start_em_lib_deps', '__stop_em_lib_deps',
                                 '__em_lib_deps'}
  internal_prefixes = ('__em_js__', '__em_lib_deps')
  return name in internal_start_stop_symbols or any(name.startswith(p) for p in internal_prefixes)


def is_user_export(name):
  if is_internal_global(name):
    return False
  return name not in {'__asyncify_data', '__asyncify_state', '__indirect_function_table', 'memory'} and not name.startswith(('dynCall_', 'orig$'))


def asmjs_mangle(name):
  """Mangle a name the way asm.js/JSBackend globals are mangled.

  Prepends '_' and replaces non-alphanumerics with '_'.
  Used by wasm backend for JS library consistency with asm.js.
  """
  # We also use this function to convert the clang-mangled `__main_argc_argv`
  # to simply `main` which is expected by the emscripten JS glue code.
  if name == '__main_argc_argv':
    name = 'main'
  if is_user_export(name):
    return '_' + name
  return name


def do_replace(input_, pattern, replacement):
  if pattern not in input_:
    exit_with_error('expected to find pattern in input JS: %s' % pattern)
  return input_.replace(pattern, replacement)


def get_llvm_target():
  if settings.MEMORY64:
    return 'wasm64-unknown-emscripten'
  else:
    return 'wasm32-unknown-emscripten'


def init():
  utils.set_version_globals()
  setup_temp_dirs()


# ============================================================================
# End declarations.
# ============================================================================

# Everything below this point is top level code that get run when importing this
# file.  TODO(sbc): We should try to reduce that amount we do here and instead
# have consumers explicitly call initialization functions.

CLANG_CC = clang_tool_path('clang')
CLANG_CXX = clang_tool_path('clang++')
CLANG_SCAN_DEPS = llvm_tool_path('clang-scan-deps')
LLVM_AR = llvm_tool_path('llvm-ar')
LLVM_DWP = llvm_tool_path('llvm-dwp')
LLVM_RANLIB = llvm_tool_path('llvm-ranlib')
LLVM_NM = llvm_tool_path('llvm-nm')
LLVM_DWARFDUMP = llvm_tool_path('llvm-dwarfdump')
LLVM_OBJCOPY = llvm_tool_path('llvm-objcopy')
WASM_LD = llvm_tool_path('wasm-ld')
LLVM_PROFDATA = llvm_tool_path('llvm-profdata')
LLVM_COV = llvm_tool_path('llvm-cov')

EMCC = exe_path_from_root('emcc')
EMXX = exe_path_from_root('em++')
EMAR = exe_path_from_root('emar')
EMRANLIB = exe_path_from_root('emranlib')
FILE_PACKAGER = exe_path_from_root('tools/file_packager')
# Windows .dll suffix is not included in this list, since those are never
# linked to directly on the command line.
DYLIB_EXTENSIONS = ['.dylib', '.so']

run_via_emxx = False

init()