#!/usr/bin/env python3 # Copyright 2012 The Emscripten Authors. All rights reserved. # Emscripten is available under two separate licenses, the MIT license and the # University of Illinois/NCSA Open Source License. Both these licenses can be # found in the LICENSE file. import json import os import re import shutil import subprocess import sys __scriptdir__ = os.path.dirname(os.path.abspath(__file__)) __rootdir__ = os.path.dirname(__scriptdir__) sys.path.insert(0, __rootdir__) from tools import building, config, shared, utils from tools.toolchain_profiler import ToolchainProfiler from tools.utils import path_from_root temp_files = shared.get_temp_files() ACORN_OPTIMIZER = path_from_root('tools/acorn-optimizer.mjs') NUM_CHUNKS_PER_CORE = 3 MIN_CHUNK_SIZE = int(os.environ.get('EMCC_JSOPT_MIN_CHUNK_SIZE') or 512 * 1024) # configuring this is just for debugging purposes MAX_CHUNK_SIZE = int(os.environ.get('EMCC_JSOPT_MAX_CHUNK_SIZE') or 5 * 1024 * 1024) WINDOWS = sys.platform.startswith('win') DEBUG = os.environ.get('EMCC_DEBUG') func_sig = re.compile(r'function ([_\w$]+)\(') func_sig_json = re.compile(r'\["defun", ?"([_\w$]+)",') def get_acorn_cmd(): node = config.NODE_JS if not any('--stack-size' in arg for arg in node): # Use an 8Mb stack (rather than the ~1Mb default) when running the # js optimizer since larger inputs can cause terser to use a lot of stack. node.append('--stack-size=8192') return node + [ACORN_OPTIMIZER] def split_funcs(js): # split properly even if there are no newlines, # which is important for deterministic builds (as which functions # are in each chunk may differ, so we need to split them up and combine # them all together later and sort them deterministically) parts = ['function ' + part for part in js.split('function ')[1:]] funcs = [] for func in parts: m = func_sig.search(func) if not m: continue ident = m.group(1) assert ident funcs.append((ident, func)) return funcs class Minifier: """Minification support. We calculate minification of globals here, then pass that into the parallel acorn-optimizer.mjs runners which perform minification of locals. """ def __init__(self, js): self.js = js self.symbols_file = None self.profiling_funcs = False def minify_shell(self, shell, minify_whitespace): # Run through acorn-optimizer.mjs to find and minify the global symbols # We send it the globals, which it parses at the proper time. JS decides how # to minify all global names, we receive a dictionary back, which is then # used by the function processors shell = shell.replace('0.0', '13371337') # avoid optimizer doing 0.0 => 0 # Find all globals in the JS functions code if not self.profiling_funcs: self.globs = [m.group(1) for m in func_sig.finditer(self.js)] if len(self.globs) == 0: self.globs = [m.group(1) for m in func_sig_json.finditer(self.js)] else: self.globs = [] with temp_files.get_file('.minifyglobals.js') as temp_file: with open(temp_file, 'w', encoding='utf-8') as f: f.write(shell) f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(self.serialize())) cmd = get_acorn_cmd() + [temp_file, 'minifyGlobals'] if minify_whitespace: cmd.append('--minify-whitespace') output = utils.run_process(cmd, stdout=subprocess.PIPE).stdout assert len(output) and not output.startswith('Assertion failed'), 'Error in js optimizer: ' + output code, metadata = output.split('// EXTRA_INFO:') self.globs = json.loads(metadata) if self.symbols_file: mapping = '\n'.join(f'{value}:{key}' for key, value in self.globs.items()) utils.write_file(self.symbols_file, mapping + '\n') print('wrote symbol map file to', self.symbols_file, file=sys.stderr) return code.replace('13371337', '0.0') def serialize(self): return { 'globals': self.globs, } start_funcs_marker = '// EMSCRIPTEN_START_FUNCS\n' end_funcs_marker = '// EMSCRIPTEN_END_FUNCS\n' start_asm_marker = '// EMSCRIPTEN_START_ASM\n' end_asm_marker = '// EMSCRIPTEN_END_ASM\n' # Given a set of functions of form (ident, text), and a preferred chunk size, # generates a set of chunks for parallel processing and caching. @ToolchainProfiler.profile() def chunkify(funcs, chunk_size): chunks = [] # initialize reasonably, the rest of the funcs we need to split out curr = [] total_size = 0 for func in funcs: curr_size = len(func[1]) if total_size + curr_size < chunk_size: curr.append(func) total_size += curr_size else: chunks.append(curr) curr = [func] total_size = curr_size if curr: chunks.append(curr) curr = None return [''.join(func[1] for func in chunk) for chunk in chunks] # remove function names @ToolchainProfiler.profile_block('js_optimizer.run_on_file') def run_on_file(filename, passes, extra_info=None): with ToolchainProfiler.profile_block('js_optimizer.split_markers'): if not isinstance(passes, list): passes = [passes] js = utils.read_file(filename) if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) if start_funcs < 0 or end_funcs < start_funcs: utils.exit_with_error('invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s' % (start_funcs, end_funcs)) minify_globals = 'minifyNames' in passes if minify_globals: passes = [p if p != 'minifyNames' else 'minifyLocals' for p in passes] start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) closure = 'closure' in passes if closure: passes = [p for p in passes if p != 'closure'] # we will do it manually cleanup = 'cleanup' in passes if cleanup: passes = [p for p in passes if p != 'cleanup'] # we will do it manually if not minify_globals: with ToolchainProfiler.profile_block('js_optimizer.no_minify_globals'): pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] # can have Module[..] and inlining prevention code, push those to post finals = [] def process(line): if line and (line.startswith(('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): finals.append(line) return False return True js = '\n'.join(line for line in js.split('\n') if process(line)) post = '\n'.join(finals) + '\n' + post post = end_funcs_marker + post else: with ToolchainProfiler.profile_block('js_optimizer.minify_globals'): # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line minifier = Minifier(js) def check_symbol_mapping(p): if p.startswith('symbolMap='): minifier.symbols_file = p.split('=', 1)[1] return False if p == 'profilingFuncs': minifier.profiling_funcs = True return False return True passes = [p for p in passes if check_symbol_mapping(p)] asm_shell_pre, asm_shell_post = minifier.minify_shell(asm_shell, '--minify-whitespace' in passes).split('EMSCRIPTEN_FUNCS();') asm_shell_post = asm_shell_post.replace('});', '})') pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() if extra_info: for key, value in extra_info.items(): assert key not in minify_info or value == minify_info[key], [key, value, minify_info[key]] minify_info[key] = value # if DEBUG: # print >> sys.stderr, 'minify info:', minify_info with ToolchainProfiler.profile_block('js_optimizer.split'): total_size = len(js) funcs = split_funcs(js) js = None with ToolchainProfiler.profile_block('js_optimizer.split_to_chunks'): # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks intended_num_chunks = round(utils.get_num_cores() * NUM_CHUNKS_PER_CORE) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = chunkify(funcs, chunk_size) chunks = [chunk for chunk in chunks if chunk] if DEBUG: lengths = [len(c) for c in chunks] if not lengths: lengths = [0] print('chunkification: num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(lengths), '-', min(lengths), file=sys.stderr) funcs = None serialized_extra_info = '' if minify_globals: assert not extra_info serialized_extra_info += '// EXTRA_INFO:' + json.dumps(minify_info) elif extra_info: serialized_extra_info += '// EXTRA_INFO:' + json.dumps(extra_info) with ToolchainProfiler.profile_block('js_optimizer.write_chunks'): def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name utils.write_file(temp_file, chunk + serialized_extra_info) return temp_file filenames = [write_chunk(chunk, i) for i, chunk in enumerate(chunks)] with ToolchainProfiler.profile_block('run_optimizer'): commands = [get_acorn_cmd() + [f] + passes for f in filenames] filenames = shared.run_multiple_processes(commands, route_stdout_to_temp_files_suffix='js_opt.jo.js') with ToolchainProfiler.profile_block('split_closure_cleanup'): if closure or cleanup: # run on the shell code, everything but what we acorn-optimize start_asm = '// EMSCRIPTEN_START_ASM\n' end_asm = '// EMSCRIPTEN_END_ASM\n' cl_sep = 'wakaUnknownBefore(); var asm=wakaUnknownAfter(wakaGlobal,wakaEnv,wakaBuffer)\n' with temp_files.get_file('.cl.js') as cle: pre_1, pre_2 = pre.split(start_asm) post_1, post_2 = post.split(end_asm) with open(cle, 'w', encoding='utf-8') as f: f.write(pre_1) f.write(cl_sep) f.write(post_2) cld = cle if closure: if DEBUG: print('running closure on shell code', file=sys.stderr) cld = building.closure_compiler(cld, pretty='--minify-whitespace' not in passes) temp_files.note(cld) elif cleanup: if DEBUG: print('running cleanup on shell code', file=sys.stderr) acorn_passes = ['JSDCE'] if '--minify-whitespace' in passes: acorn_passes.append('--minify-whitespace') cld = building.acorn_optimizer(cld, acorn_passes) temp_files.note(cld) coutput = utils.read_file(cld) coutput = coutput.replace('wakaUnknownBefore();', start_asm) after = 'wakaUnknownAfter' start = coutput.find(after) end = coutput.find(')', start) # If the closure comment to suppress useless code is present, we need to look one # brace past it, as the first is in there. Otherwise, the first brace is the # start of the function body (what we want). USELESS_CODE_COMMENT = '/** @suppress {uselessCode} */ ' USELESS_CODE_COMMENT_BODY = 'uselessCode' brace = pre_2.find('{') + 1 has_useless_code_comment = False if pre_2[brace:brace + len(USELESS_CODE_COMMENT_BODY)] == USELESS_CODE_COMMENT_BODY: brace = pre_2.find('{', brace) + 1 has_useless_code_comment = True pre = coutput[:start] + '(' + (USELESS_CODE_COMMENT if has_useless_code_comment else '') + 'function(global,env,buffer) {\n' + pre_2[brace:] post = post_1 + end_asm + coutput[end + 1:] filename += '.jo.js' temp_files.note(filename) with open(filename, 'w', encoding='utf-8') as f: with ToolchainProfiler.profile_block('write_pre'): f.write(pre) pre = None with ToolchainProfiler.profile_block('sort_or_concat'): # sort functions by size, to make diffing easier and to improve aot times funcses = [split_funcs(utils.read_file(out_file)) for out_file in filenames] funcs = [item for sublist in funcses for item in sublist] funcses = None if not os.environ.get('EMCC_NO_OPT_SORT'): funcs.sort(key=lambda x: (len(x[1]), x[0]), reverse=True) for func in funcs: f.write(func[1]) funcs = None with ToolchainProfiler.profile_block('write_post'): f.write('\n') f.write(post) f.write('\n') return filename def main(): last = sys.argv[-1] if '{' in last: extra_info = json.loads(last) sys.argv = sys.argv[:-1] else: extra_info = None out = run_on_file(sys.argv[1], sys.argv[2:], extra_info=extra_info) shutil.copyfile(out, sys.argv[1] + '.jsopt.js') return 0 if __name__ == '__main__': sys.exit(main())