| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | from __future__ import absolute_import |
| | from __future__ import print_function |
| | from __future__ import unicode_literals |
| | from argparse import ArgumentParser |
| | from contextlib import contextmanager |
| | from contextlib import closing |
| | from functools import partial |
| | import gettext |
| | import io |
| | import logging |
| | import os.path |
| | import shutil |
| | import shutil |
| | import sys |
| | import base64 |
| | import re |
| | import tempfile |
| | import mimetypes |
| |
|
| | from . import __version__ as version |
| | from .cli import init_logger |
| | from .transforms import BaseTransform |
| | from .utils import cached_property |
| |
|
| |
|
| | PY3 = sys.version_info.major == 3 |
| | logger = logging.getLogger(__name__) |
| | locale_dir = os.path.join(os.path.dirname(__file__), 'locale') |
| | locale_dir = os.path.abspath(locale_dir) |
| | t = gettext.translation('hwp5html', locale_dir, fallback=True) |
| | _ = t.gettext |
| |
|
| |
|
| | RESOURCE_PATH_XSL_CSS = 'xsl/hwp5css.xsl' |
| | RESOURCE_PATH_XSL_XHTML = 'xsl/hwp5html.xsl' |
| |
|
| |
|
| | class HTMLTransform(BaseTransform): |
| |
|
| | @property |
| | def transform_hwp5_to_css(self): |
| | ''' |
| | >>> T.transform_hwp5_to_css(hwp5file, 'styles.css') |
| | ''' |
| | transform_xhwp5 = self.transform_xhwp5_to_css |
| | return self.make_transform_hwp5(transform_xhwp5) |
| |
|
| | @property |
| | def transform_hwp5_to_xhtml(self): |
| | ''' |
| | >>> T.transform_hwp5_to_xhtml(hwp5file, 'index.xhtml') |
| | ''' |
| | transform_xhwp5 = self.transform_xhwp5_to_xhtml |
| | return self.make_transform_hwp5(transform_xhwp5) |
| |
|
| | def transform_hwp5_to_dir(self, hwp5file, outdir): |
| | ''' |
| | >>> T.transform_hwp5_to_dir(hwp5file, 'output') |
| | ''' |
| | with self.transformed_xhwp5_at_temp(hwp5file) as xhwp5path: |
| | self.transform_xhwp5_to_dir(xhwp5path, outdir) |
| |
|
| | bindata_dir = os.path.join(outdir, 'bindata') |
| | self.extract_bindata_dir(hwp5file, bindata_dir) |
| |
|
| | @cached_property |
| | def transform_xhwp5_to_css(self): |
| | ''' |
| | >>> T.transform_xhwp5_to_css('hwp5.xml', 'styles.css') |
| | ''' |
| | resource_path = RESOURCE_PATH_XSL_CSS |
| | return self.make_xsl_transform(resource_path) |
| |
|
| | @cached_property |
| | def transform_xhwp5_to_xhtml(self): |
| | ''' |
| | >>> T.transform_xhwp5_to_xhtml('hwp5.xml', 'index.xhtml') |
| | ''' |
| | resource_path = RESOURCE_PATH_XSL_XHTML |
| | return self.make_xsl_transform(resource_path) |
| |
|
| | def transform_xhwp5_to_dir(self, xhwp5path, outdir): |
| | ''' |
| | >>> T.transform_xhwp5_to_dir('hwp5.xml', 'output') |
| | ''' |
| | html_path = os.path.join(outdir, 'index.xhtml') |
| | with io.open(html_path, 'wb') as f: |
| | self.transform_xhwp5_to_xhtml(xhwp5path, f) |
| |
|
| | css_path = os.path.join(outdir, 'styles.css') |
| | with io.open(css_path, 'wb') as f: |
| | self.transform_xhwp5_to_css(xhwp5path, f) |
| |
|
| | def transform_hwp5_to_single(self, hwp5file, outpath): |
| | """ |
| | Convert HWP file to a single HTML file with embedded CSS and images. |
| | """ |
| | |
| | with tempfile.TemporaryDirectory() as temp_dir: |
| | |
| | self.transform_hwp5_to_dir(hwp5file, temp_dir) |
| | |
| | |
| | html_path = os.path.join(temp_dir, 'index.xhtml') |
| | css_path = os.path.join(temp_dir, 'styles.css') |
| | bindata_dir = os.path.join(temp_dir, 'bindata') |
| | |
| | |
| | if os.path.exists(html_path): |
| | with io.open(html_path, 'r', encoding='utf-8') as f: |
| | html_content = f.read() |
| | else: |
| | raise RuntimeError("HTML generation failed") |
| | |
| | css_content = "" |
| | if os.path.exists(css_path): |
| | with io.open(css_path, 'r', encoding='utf-8') as f: |
| | css_content = f.read() |
| | |
| | |
| | |
| | if css_content: |
| | style_tag = f'<style>\n{css_content}\n</style>\n' |
| | html_content = html_content.replace('</head>', f'{style_tag}</head>') |
| | |
| | |
| | html_content = re.sub(r'<link[^>]+href="styles.css"[^>]*/>', '', html_content) |
| |
|
| | |
| | if os.path.exists(bindata_dir): |
| | |
| | def replace_image(match): |
| | src = match.group(1) |
| | if src.startswith('bindata/'): |
| | image_filename = os.path.basename(src) |
| | image_path = os.path.join(bindata_dir, image_filename) |
| | if os.path.exists(image_path): |
| | |
| | mime_type, _ = mimetypes.guess_type(image_path) |
| | if not mime_type: |
| | mime_type = 'image/png' |
| | |
| | with open(image_path, 'rb') as img_f: |
| | img_data = img_f.read() |
| | b64_data = base64.b64encode(img_data).decode('ascii') |
| | return f'src="data:{mime_type};base64,{b64_data}"' |
| | return match.group(0) |
| | |
| | |
| | |
| | html_content = re.sub(r'src="(bindata/[^"]+)"', replace_image, html_content) |
| | |
| | |
| | with io.open(outpath, 'w', encoding='utf-8') as f: |
| | f.write(html_content) |
| |
|
| | def extract_bindata_dir(self, hwp5file, bindata_dir): |
| | if 'BinData' not in hwp5file: |
| | return |
| | bindata_stg = hwp5file['BinData'] |
| | if not os.path.exists(bindata_dir): |
| | os.mkdir(bindata_dir) |
| |
|
| | from hwp5.storage import unpack |
| | unpack(bindata_stg, bindata_dir) |
| |
|
| |
|
| | def main(): |
| | from .dataio import ParseError |
| | from .errors import InvalidHwp5FileError |
| | from .utils import make_open_dest_file |
| | from .xmlmodel import Hwp5File |
| |
|
| | argparser = main_argparser() |
| | args = argparser.parse_args() |
| | init_logger(args) |
| |
|
| | hwp5path = args.hwp5file |
| |
|
| | html_transform = HTMLTransform() |
| |
|
| | open_dest = make_open_dest_file(args.output) |
| | if args.css: |
| | transform = html_transform.transform_hwp5_to_css |
| | open_dest = wrap_for_css(open_dest) |
| | elif args.html: |
| | transform = html_transform.transform_hwp5_to_xhtml |
| | open_dest = wrap_for_xml(open_dest) |
| | elif args.embed_image: |
| | transform = html_transform.transform_hwp5_to_single |
| | |
| | |
| | if not args.output: |
| | args.output = os.path.splitext(os.path.basename(hwp5path))[0] + '.html' |
| | open_dest = lambda: contextmanager(lambda: (yield args.output))() |
| | else: |
| | transform = html_transform.transform_hwp5_to_dir |
| | dest_path = args.output |
| | if not dest_path: |
| | dest_path = os.path.splitext(os.path.basename(hwp5path))[0] |
| | open_dest = partial(open_dir, dest_path) |
| |
|
| | print(f"DEBUG: Input file: {hwp5path}") |
| | print(f"DEBUG: Args: css={args.css}, html={args.html}, embed_image={getattr(args, 'embed_image', False)}") |
| |
|
| | try: |
| | with closing(Hwp5File(hwp5path)) as hwp5file: |
| | with open_dest() as dest: |
| | print(f"DEBUG: Starting transformation using {transform}") |
| | transform(hwp5file, dest) |
| | print("DEBUG: Transformation finished") |
| | except Exception as e: |
| | import traceback |
| | traceback.print_exc() |
| | logger.error('%s', e) |
| | sys.exit(1) |
| |
|
| |
|
| | def main_argparser(): |
| | parser = ArgumentParser( |
| | prog='hwp5html', |
| | description=_('HWPv5 to HTML converter'), |
| | ) |
| | parser.add_argument( |
| | '--version', |
| | action='version', |
| | version='%(prog)s {}'.format(version) |
| | ) |
| | parser.add_argument( |
| | '--loglevel', |
| | help=_('Set log level.'), |
| | ) |
| | parser.add_argument( |
| | '--logfile', |
| | help=_('Set log file.'), |
| | ) |
| | parser.add_argument( |
| | '--output', |
| | help=_('Output file'), |
| | ) |
| | parser.add_argument( |
| | 'hwp5file', |
| | metavar='<hwp5file>', |
| | help=_('.hwp file to convert'), |
| | ) |
| | generator_group = parser.add_mutually_exclusive_group() |
| | generator_group.add_argument( |
| | '--css', |
| | action='store_true', |
| | help=_('Generate CSS'), |
| | ) |
| | generator_group.add_argument( |
| | '--html', |
| | action='store_true', |
| | help=_('Generate HTML'), |
| | ) |
| | generator_group.add_argument( |
| | '--embed-image', |
| | action='store_true', |
| | help=_('Embed images and CSS into a single HTML file'), |
| | ) |
| | return parser |
| |
|
| |
|
| | @contextmanager |
| | def open_dir(path): |
| | if os.path.exists(path): |
| | shutil.rmtree(path) |
| | os.mkdir(path) |
| | yield path |
| |
|
| |
|
| | def wrap_for_css(open_dest): |
| | from .utils import wrap_open_dest_for_tty |
| | from .utils import pager |
| | from .utils import syntaxhighlight |
| | return wrap_open_dest_for_tty(open_dest, [ |
| | pager(), |
| | syntaxhighlight('text/css'), |
| | ]) |
| |
|
| |
|
| | def wrap_for_xml(open_dest): |
| | from .utils import wrap_open_dest_for_tty |
| | from .utils import pager |
| | from .utils import syntaxhighlight |
| | from .utils import xmllint |
| | return wrap_open_dest_for_tty(open_dest, [ |
| | pager(), |
| | syntaxhighlight('application/xml'), |
| | xmllint(format=True, nonet=True), |
| | ]) |
| |
|
| |
|
| | if __name__ == '__main__': |
| | main() |
| |
|