|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import absolute_import |
|
|
from __future__ import print_function |
|
|
from __future__ import unicode_literals |
|
|
from argparse import ArgumentParser |
|
|
from contextlib import contextmanager |
|
|
from contextlib import closing |
|
|
from functools import partial |
|
|
import gettext |
|
|
import io |
|
|
import logging |
|
|
import os.path |
|
|
import shutil |
|
|
import shutil |
|
|
import sys |
|
|
import base64 |
|
|
import re |
|
|
import tempfile |
|
|
import mimetypes |
|
|
|
|
|
from . import __version__ as version |
|
|
from .cli import init_logger |
|
|
from .transforms import BaseTransform |
|
|
from .utils import cached_property |
|
|
|
|
|
|
|
|
PY3 = sys.version_info.major == 3 |
|
|
logger = logging.getLogger(__name__) |
|
|
locale_dir = os.path.join(os.path.dirname(__file__), 'locale') |
|
|
locale_dir = os.path.abspath(locale_dir) |
|
|
t = gettext.translation('hwp5html', locale_dir, fallback=True) |
|
|
_ = t.gettext |
|
|
|
|
|
|
|
|
RESOURCE_PATH_XSL_CSS = 'xsl/hwp5css.xsl' |
|
|
RESOURCE_PATH_XSL_XHTML = 'xsl/hwp5html.xsl' |
|
|
|
|
|
|
|
|
class HTMLTransform(BaseTransform): |
|
|
|
|
|
@property |
|
|
def transform_hwp5_to_css(self): |
|
|
''' |
|
|
>>> T.transform_hwp5_to_css(hwp5file, 'styles.css') |
|
|
''' |
|
|
transform_xhwp5 = self.transform_xhwp5_to_css |
|
|
return self.make_transform_hwp5(transform_xhwp5) |
|
|
|
|
|
@property |
|
|
def transform_hwp5_to_xhtml(self): |
|
|
''' |
|
|
>>> T.transform_hwp5_to_xhtml(hwp5file, 'index.xhtml') |
|
|
''' |
|
|
transform_xhwp5 = self.transform_xhwp5_to_xhtml |
|
|
return self.make_transform_hwp5(transform_xhwp5) |
|
|
|
|
|
def transform_hwp5_to_dir(self, hwp5file, outdir): |
|
|
''' |
|
|
>>> T.transform_hwp5_to_dir(hwp5file, 'output') |
|
|
''' |
|
|
with self.transformed_xhwp5_at_temp(hwp5file) as xhwp5path: |
|
|
self.transform_xhwp5_to_dir(xhwp5path, outdir) |
|
|
|
|
|
bindata_dir = os.path.join(outdir, 'bindata') |
|
|
self.extract_bindata_dir(hwp5file, bindata_dir) |
|
|
|
|
|
@cached_property |
|
|
def transform_xhwp5_to_css(self): |
|
|
''' |
|
|
>>> T.transform_xhwp5_to_css('hwp5.xml', 'styles.css') |
|
|
''' |
|
|
resource_path = RESOURCE_PATH_XSL_CSS |
|
|
return self.make_xsl_transform(resource_path) |
|
|
|
|
|
@cached_property |
|
|
def transform_xhwp5_to_xhtml(self): |
|
|
''' |
|
|
>>> T.transform_xhwp5_to_xhtml('hwp5.xml', 'index.xhtml') |
|
|
''' |
|
|
resource_path = RESOURCE_PATH_XSL_XHTML |
|
|
return self.make_xsl_transform(resource_path) |
|
|
|
|
|
def transform_xhwp5_to_dir(self, xhwp5path, outdir): |
|
|
''' |
|
|
>>> T.transform_xhwp5_to_dir('hwp5.xml', 'output') |
|
|
''' |
|
|
html_path = os.path.join(outdir, 'index.xhtml') |
|
|
with io.open(html_path, 'wb') as f: |
|
|
self.transform_xhwp5_to_xhtml(xhwp5path, f) |
|
|
|
|
|
css_path = os.path.join(outdir, 'styles.css') |
|
|
with io.open(css_path, 'wb') as f: |
|
|
self.transform_xhwp5_to_css(xhwp5path, f) |
|
|
|
|
|
def transform_hwp5_to_single(self, hwp5file, outpath): |
|
|
""" |
|
|
Convert HWP file to a single HTML file with embedded CSS and images. |
|
|
""" |
|
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir: |
|
|
|
|
|
self.transform_hwp5_to_dir(hwp5file, temp_dir) |
|
|
|
|
|
|
|
|
html_path = os.path.join(temp_dir, 'index.xhtml') |
|
|
css_path = os.path.join(temp_dir, 'styles.css') |
|
|
bindata_dir = os.path.join(temp_dir, 'bindata') |
|
|
|
|
|
|
|
|
if os.path.exists(html_path): |
|
|
with io.open(html_path, 'r', encoding='utf-8') as f: |
|
|
html_content = f.read() |
|
|
else: |
|
|
raise RuntimeError("HTML generation failed") |
|
|
|
|
|
css_content = "" |
|
|
if os.path.exists(css_path): |
|
|
with io.open(css_path, 'r', encoding='utf-8') as f: |
|
|
css_content = f.read() |
|
|
|
|
|
|
|
|
|
|
|
if css_content: |
|
|
style_tag = f'<style>\n{css_content}\n</style>\n' |
|
|
html_content = html_content.replace('</head>', f'{style_tag}</head>') |
|
|
|
|
|
|
|
|
html_content = re.sub(r'<link[^>]+href="styles.css"[^>]*/>', '', html_content) |
|
|
|
|
|
|
|
|
if os.path.exists(bindata_dir): |
|
|
|
|
|
def replace_image(match): |
|
|
src = match.group(1) |
|
|
if src.startswith('bindata/'): |
|
|
image_filename = os.path.basename(src) |
|
|
image_path = os.path.join(bindata_dir, image_filename) |
|
|
if os.path.exists(image_path): |
|
|
|
|
|
mime_type, _ = mimetypes.guess_type(image_path) |
|
|
if not mime_type: |
|
|
mime_type = 'image/png' |
|
|
|
|
|
with open(image_path, 'rb') as img_f: |
|
|
img_data = img_f.read() |
|
|
b64_data = base64.b64encode(img_data).decode('ascii') |
|
|
return f'src="data:{mime_type};base64,{b64_data}"' |
|
|
return match.group(0) |
|
|
|
|
|
|
|
|
|
|
|
html_content = re.sub(r'src="(bindata/[^"]+)"', replace_image, html_content) |
|
|
|
|
|
|
|
|
with io.open(outpath, 'w', encoding='utf-8') as f: |
|
|
f.write(html_content) |
|
|
|
|
|
def extract_bindata_dir(self, hwp5file, bindata_dir): |
|
|
if 'BinData' not in hwp5file: |
|
|
return |
|
|
bindata_stg = hwp5file['BinData'] |
|
|
if not os.path.exists(bindata_dir): |
|
|
os.mkdir(bindata_dir) |
|
|
|
|
|
from hwp5.storage import unpack |
|
|
unpack(bindata_stg, bindata_dir) |
|
|
|
|
|
|
|
|
def main(): |
|
|
from .dataio import ParseError |
|
|
from .errors import InvalidHwp5FileError |
|
|
from .utils import make_open_dest_file |
|
|
from .xmlmodel import Hwp5File |
|
|
|
|
|
argparser = main_argparser() |
|
|
args = argparser.parse_args() |
|
|
init_logger(args) |
|
|
|
|
|
hwp5path = args.hwp5file |
|
|
|
|
|
html_transform = HTMLTransform() |
|
|
|
|
|
open_dest = make_open_dest_file(args.output) |
|
|
if args.css: |
|
|
transform = html_transform.transform_hwp5_to_css |
|
|
open_dest = wrap_for_css(open_dest) |
|
|
elif args.html: |
|
|
transform = html_transform.transform_hwp5_to_xhtml |
|
|
open_dest = wrap_for_xml(open_dest) |
|
|
elif args.embed_image: |
|
|
transform = html_transform.transform_hwp5_to_single |
|
|
|
|
|
|
|
|
if not args.output: |
|
|
args.output = os.path.splitext(os.path.basename(hwp5path))[0] + '.html' |
|
|
open_dest = lambda: contextmanager(lambda: (yield args.output))() |
|
|
else: |
|
|
transform = html_transform.transform_hwp5_to_dir |
|
|
dest_path = args.output |
|
|
if not dest_path: |
|
|
dest_path = os.path.splitext(os.path.basename(hwp5path))[0] |
|
|
open_dest = partial(open_dir, dest_path) |
|
|
|
|
|
print(f"DEBUG: Input file: {hwp5path}") |
|
|
print(f"DEBUG: Args: css={args.css}, html={args.html}, embed_image={getattr(args, 'embed_image', False)}") |
|
|
|
|
|
try: |
|
|
with closing(Hwp5File(hwp5path)) as hwp5file: |
|
|
with open_dest() as dest: |
|
|
print(f"DEBUG: Starting transformation using {transform}") |
|
|
transform(hwp5file, dest) |
|
|
print("DEBUG: Transformation finished") |
|
|
except Exception as e: |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
logger.error('%s', e) |
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
def main_argparser(): |
|
|
parser = ArgumentParser( |
|
|
prog='hwp5html', |
|
|
description=_('HWPv5 to HTML converter'), |
|
|
) |
|
|
parser.add_argument( |
|
|
'--version', |
|
|
action='version', |
|
|
version='%(prog)s {}'.format(version) |
|
|
) |
|
|
parser.add_argument( |
|
|
'--loglevel', |
|
|
help=_('Set log level.'), |
|
|
) |
|
|
parser.add_argument( |
|
|
'--logfile', |
|
|
help=_('Set log file.'), |
|
|
) |
|
|
parser.add_argument( |
|
|
'--output', |
|
|
help=_('Output file'), |
|
|
) |
|
|
parser.add_argument( |
|
|
'hwp5file', |
|
|
metavar='<hwp5file>', |
|
|
help=_('.hwp file to convert'), |
|
|
) |
|
|
generator_group = parser.add_mutually_exclusive_group() |
|
|
generator_group.add_argument( |
|
|
'--css', |
|
|
action='store_true', |
|
|
help=_('Generate CSS'), |
|
|
) |
|
|
generator_group.add_argument( |
|
|
'--html', |
|
|
action='store_true', |
|
|
help=_('Generate HTML'), |
|
|
) |
|
|
generator_group.add_argument( |
|
|
'--embed-image', |
|
|
action='store_true', |
|
|
help=_('Embed images and CSS into a single HTML file'), |
|
|
) |
|
|
return parser |
|
|
|
|
|
|
|
|
@contextmanager |
|
|
def open_dir(path): |
|
|
if os.path.exists(path): |
|
|
shutil.rmtree(path) |
|
|
os.mkdir(path) |
|
|
yield path |
|
|
|
|
|
|
|
|
def wrap_for_css(open_dest): |
|
|
from .utils import wrap_open_dest_for_tty |
|
|
from .utils import pager |
|
|
from .utils import syntaxhighlight |
|
|
return wrap_open_dest_for_tty(open_dest, [ |
|
|
pager(), |
|
|
syntaxhighlight('text/css'), |
|
|
]) |
|
|
|
|
|
|
|
|
def wrap_for_xml(open_dest): |
|
|
from .utils import wrap_open_dest_for_tty |
|
|
from .utils import pager |
|
|
from .utils import syntaxhighlight |
|
|
from .utils import xmllint |
|
|
return wrap_open_dest_for_tty(open_dest, [ |
|
|
pager(), |
|
|
syntaxhighlight('application/xml'), |
|
|
xmllint(format=True, nonet=True), |
|
|
]) |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
main() |
|
|
|