| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| from __future__ import absolute_import |
| from __future__ import print_function |
| from __future__ import unicode_literals |
| from argparse import ArgumentParser |
| from contextlib import contextmanager |
| from contextlib import closing |
| from functools import partial |
| from io import BytesIO |
| import gettext |
| import io |
| import logging |
| import os.path |
| import sys |
|
|
| from . import __version__ as version |
| from .cli import init_logger |
| from .cli import init_with_environ |
| from .errors import ImplementationNotAvailable |
| from .utils import mkstemp_open |
| from .utils import hwp5_resources_path |
| from .transforms import BaseTransform |
| from .plat import get_relaxng_compile |
| from .utils import cached_property |
|
|
|
|
| PY3 = sys.version_info.major == 3 |
| if PY3: |
| basestring = str |
| unicode = str |
| logger = logging.getLogger(__name__) |
| locale_dir = os.path.join(os.path.dirname(__file__), 'locale') |
| locale_dir = os.path.abspath(locale_dir) |
| t = gettext.translation('hwp5odt', locale_dir, fallback=True) |
| _ = t.gettext |
|
|
|
|
| RESOURCE_PATH_RNG = 'odf-relaxng/OpenDocument-v1.2-os-schema.rng' |
| RESOURCE_PATH_XSL_SINGLE_DOCUMENT = 'xsl/odt/document.xsl' |
| RESOURCE_PATH_XSL_STYLE = 'xsl/odt/styles.xsl' |
| RESOURCE_PATH_XSL_CONTENT = 'xsl/odt/content.xsl' |
|
|
|
|
| class ODFValidate: |
|
|
| def __init__(self, relaxng_compile=None): |
| ''' |
| >>> V = ODFValidate() |
| ''' |
| if relaxng_compile is None: |
| try: |
| relaxng_compile = self.get_default_relaxng_compile() |
| except ImplementationNotAvailable: |
| relaxng_compile = None |
| self.relaxng_compile = relaxng_compile |
|
|
| @classmethod |
| def get_default_relaxng_compile(cls): |
| relaxng_compile = get_relaxng_compile() |
| if not relaxng_compile: |
| raise ImplementationNotAvailable('relaxng') |
| return relaxng_compile |
|
|
| @cached_property |
| def odf_validator(self): |
| ''' |
| >>> with V.odf_validator(sys.stdout) as output: |
| ... output.write(xml) |
| ''' |
| return self.make_odf_validator() |
|
|
| def make_odf_validator(self): |
| if self.relaxng_compile: |
| with hwp5_resources_path(RESOURCE_PATH_RNG) as rng_path: |
| return self.relaxng_compile(rng_path) |
|
|
|
|
| class ODTTransform(BaseTransform, ODFValidate): |
|
|
| def __init__(self, xslt_compile=None, relaxng_compile=None, |
| embedbin=False): |
| ''' |
| >>> from hwp5.hwp5odt import ODTTransform |
| >>> T = ODTTransform() |
| ''' |
| BaseTransform.__init__(self, xslt_compile=xslt_compile, |
| embedbin=embedbin) |
| ODFValidate.__init__(self, relaxng_compile) |
|
|
| @property |
| def transform_hwp5_to_styles(self): |
| ''' |
| >>> with io.open('styles.xml', 'wb') as f: |
| ... T.transform_hwp5_to_styles(hwp5file, f) |
| ''' |
| transform_xhwp5 = self.transform_xhwp5_to_styles |
| return self.make_transform_hwp5(transform_xhwp5) |
|
|
| @property |
| def transform_hwp5_to_content(self): |
| ''' |
| >>> with io.open('content.xml', 'wb') as f: |
| ... T.transform_hwp5_to_content(hwp5file, f) |
| ''' |
| transform_xhwp5 = self.transform_xhwp5_to_content |
| return self.make_transform_hwp5(transform_xhwp5) |
|
|
| @property |
| def transform_hwp5_to_single_document(self): |
| ''' |
| >>> with io.open('transformed.fodt', 'wb') as f: |
| ... T.transform_hwp5_to_single_document(hwp5file, f) |
| ''' |
| transform_xhwp5 = self.transform_xhwp5_to_single_document |
| return self.make_transform_hwp5(transform_xhwp5) |
|
|
| def transform_hwp5_to_package(self, hwp5file, odtpkg): |
| ''' |
| >>> with open_odtpkg('transformed.odt') as odtpkg: |
| ... T.transform_hwp5_to_package(hwp5file, odtpkg) |
| ''' |
| with self.transformed_xhwp5_at_temp(hwp5file) as xml_path: |
| self.transform_xhwp5_into_package(xml_path, odtpkg) |
|
|
| if 'BinData' in hwp5file: |
| bindata = hwp5file['BinData'] |
| for name in bindata: |
| f = bindata[name].open() |
| path = 'bindata/' + name |
| mimetype = 'application/octet-stream' |
| odtpkg.insert_stream(f, path, mimetype) |
|
|
| @cached_property |
| def transform_xhwp5_to_styles(self): |
| ''' |
| >>> with io.open('styles.xml', 'wb') as f: |
| ... T.transform_xhwp5_to_styles('input.xml', f) |
| ''' |
| resource_path = RESOURCE_PATH_XSL_STYLE |
| return self.make_odf_transform(resource_path) |
|
|
| @cached_property |
| def transform_xhwp5_to_content(self): |
| ''' |
| >>> with io.open('content.xml', 'wb') as f: |
| ... T.transform_xhwp5_to_content('input.xml', f) |
| ''' |
| resource_path = RESOURCE_PATH_XSL_CONTENT |
| return self.make_odf_transform(resource_path) |
|
|
| @cached_property |
| def transform_xhwp5_to_single_document(self): |
| ''' |
| >>> with io.open('transformed.fodf', 'wb') as f: |
| ... T.transform_xhwp5_to_single_document('input.xml', f) |
| ''' |
| resource_path = RESOURCE_PATH_XSL_SINGLE_DOCUMENT |
| return self.make_odf_transform(resource_path) |
|
|
| @property |
| def transform_xhwp5_into_package(self): |
| ''' |
| >>> with open_odtpkg('transformed.odt') as odtpkg: |
| >>> T.transform_xhwp5_into_package('input.xml', odtpkg) |
| ''' |
| def transform(xhwp5path, odtpkg): |
| with self.transformed_styles_at_temp(xhwp5path) as path: |
| odtpkg.insert_path(path, 'styles.xml', 'text/xml') |
| with self.transformed_content_at_temp(xhwp5path) as path: |
| odtpkg.insert_path(path, 'content.xml', 'text/xml') |
|
|
| rdf = BytesIO() |
| manifest_rdf(rdf) |
| rdf.seek(0) |
| odtpkg.insert_stream(rdf, 'manifest.rdf', |
| 'application/rdf+xml') |
| return transform |
|
|
| def transformed_styles_at_temp(self, xhwp5path): |
| ''' |
| >>> with T.transformed_styles_at_temp('input.xml') as styles_path: |
| ... pass |
| ''' |
| transform_xhwp5 = self.transform_xhwp5_to_styles |
| return transformed_at_temp_path(xhwp5path, transform_xhwp5) |
|
|
| def transformed_content_at_temp(self, xhwp5path): |
| ''' |
| >>> with T.transformed_content_at_temp('input.xml') as content_path: |
| ... pass |
| ''' |
| transform_xhwp5 = self.transform_xhwp5_to_content |
| return transformed_at_temp_path(xhwp5path, transform_xhwp5) |
|
|
| def transformed_single_document_at_temp(self, xhwp5path): |
| ''' |
| >>> with T.transformed_single_document_at_temp('input.xml') as path: |
| ... pass |
| ''' |
| transform_xhwp5 = self.transform_xhwp5_to_single_document |
| return transformed_at_temp_path(xhwp5path, transform_xhwp5) |
|
|
| def make_odf_transform(self, resource_path): |
| transform = self.make_xsl_transform(resource_path) |
| validator = self.odf_validator |
| if validator: |
| def validating_transform(input, output): |
| with validator.validating_output(output) as output: |
| transform(input, output) |
| return validating_transform |
| else: |
| return transform |
|
|
|
|
| @contextmanager |
| def transformed_at_temp_path(inp_path, transform): |
| with mkstemp_open() as (tmp_path, f): |
| transform(inp_path, f) |
| f.flush() |
| yield tmp_path |
|
|
|
|
| class ODTPackage(object): |
| def __init__(self, path_or_zipfile): |
| self.files = [] |
|
|
| if isinstance(path_or_zipfile, basestring): |
| from zipfile import ZipFile |
| zipfile = ZipFile(path_or_zipfile, 'w') |
| else: |
| zipfile = path_or_zipfile |
| self.zf = zipfile |
|
|
| def insert_path(self, src_path, path, media_type): |
| with io.open(src_path, 'rb') as f: |
| self.insert_stream(f, path, media_type) |
|
|
| def insert_stream(self, f, path, media_type): |
| if not isinstance(path, unicode): |
| path = path.decode('utf-8') |
| self.zf.writestr(path, f.read()) |
| self.files.append(dict(full_path=path, media_type=media_type)) |
|
|
| def close(self): |
|
|
| manifest = BytesIO() |
| manifest_xml(manifest, self.files) |
| manifest.seek(0) |
| self.zf.writestr('META-INF/manifest.xml', manifest.getvalue()) |
| self.zf.writestr('mimetype', 'application/vnd.oasis.opendocument.text') |
|
|
| self.zf.close() |
|
|
|
|
| def manifest_xml(f, files): |
| from xml.sax.saxutils import XMLGenerator |
| xml = XMLGenerator(f, 'utf-8') |
| xml.startDocument() |
|
|
| uri = 'urn:oasis:names:tc:opendocument:xmlns:manifest:1.0' |
| prefix = 'manifest' |
| xml.startPrefixMapping(prefix, uri) |
|
|
| def startElement(name, attrs): |
| attrs = dict(((uri, n), v) for n, v in attrs.items()) |
| xml.startElementNS((uri, name), prefix + ':' + name, attrs) |
|
|
| def endElement(name): |
| xml.endElementNS((uri, name), prefix + ':' + name) |
|
|
| def file_entry(full_path, media_type, **kwargs): |
| attrs = {'media-type': media_type, 'full-path': full_path} |
| attrs.update(dict((n.replace('_', '-'), v) |
| for n, v in kwargs.items())) |
| startElement('file-entry', attrs) |
| endElement('file-entry') |
|
|
| startElement('manifest', dict(version='1.2')) |
| file_entry('/', 'application/vnd.oasis.opendocument.text', version='1.2') |
| for e in files: |
| e = dict(e) |
| full_path = e.pop('full_path') |
| media_type = e.pop('media_type', 'application/octet-stream') |
| file_entry(full_path, media_type) |
| endElement('manifest') |
|
|
| xml.endPrefixMapping(prefix) |
| xml.endDocument() |
|
|
|
|
| def manifest_rdf(f): |
| f.write(b'''<?xml version="1.0" encoding="utf-8"?> |
| <rdf:RDF |
| xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" |
| xmlns:pkg="http://docs.oasis-open.org/ns/office/1.2/meta/pkg#" |
| xmlns:odf="http://docs.oasis-open.org/ns/office/1.2/meta/odf#"> |
| <pkg:Document rdf:about=""> |
| <pkg:hasPart rdf:resource="content.xml"/> |
| <pkg:hasPart rdf:resource="styles.xml"/> |
| </pkg:Document> |
| <odf:ContentFile rdf:about="content.xml"/> |
| <odf:StylesFile rdf:about="styles.xml"/> |
| </rdf:RDF>''') |
|
|
|
|
| def main(): |
| from .dataio import ParseError |
| from .errors import InvalidHwp5FileError |
| from .utils import make_open_dest_file |
| from .xmlmodel import Hwp5File |
|
|
| argparser = main_argparser() |
| args = argparser.parse_args() |
| init_logger(args) |
|
|
| init_with_environ() |
|
|
| hwp5path = args.hwp5file |
|
|
| odt_transform = ODTTransform() |
|
|
| open_dest = make_open_dest_file(args.output) |
| if args.document: |
| odt_transform.embedbin = not args.no_embed_image |
| transform = odt_transform.transform_hwp5_to_single_document |
| open_dest = wrap_for_xml(open_dest) |
| elif args.styles: |
| odt_transform.embedbin = args.embed_image |
| transform = odt_transform.transform_hwp5_to_styles |
| open_dest = wrap_for_xml(open_dest) |
| elif args.content: |
| odt_transform.embedbin = args.embed_image |
| transform = odt_transform.transform_hwp5_to_content |
| open_dest = wrap_for_xml(open_dest) |
| else: |
| odt_transform.embedbin = args.embed_image |
| transform = odt_transform.transform_hwp5_to_package |
| dest_path = args.output |
| dest_path = dest_path or replace_ext(hwp5path, '.odt') |
| open_dest = partial(open_odtpkg, dest_path) |
|
|
| try: |
| with closing(Hwp5File(hwp5path)) as hwp5file: |
| with open_dest() as dest: |
| transform(hwp5file, dest) |
| except ParseError as e: |
| e.print_to_logger(logger) |
| except InvalidHwp5FileError as e: |
| logger.error('%s', e) |
| sys.exit(1) |
|
|
|
|
| def main_argparser(): |
| parser = ArgumentParser( |
| prog='hwp5odt', |
| description=_('HWPv5 to odt converter'), |
| ) |
| parser.add_argument( |
| '--version', |
| action='version', |
| version='%(prog)s {}'.format(version) |
| ) |
| parser.add_argument( |
| '--loglevel', |
| help=_('Set log level.'), |
| ) |
| parser.add_argument( |
| '--logfile', |
| help=_('Set log file.'), |
| ) |
| parser.add_argument( |
| '--output', |
| help=_('Output file'), |
| ) |
| parser.add_argument( |
| 'hwp5file', |
| metavar='<hwp5file>', |
| help=_('.hwp file to convert'), |
| ) |
| generator_group = parser.add_mutually_exclusive_group() |
| generator_group.add_argument( |
| '--styles', |
| action='store_true', |
| help=_('Generate styles.xml'), |
| ) |
| generator_group.add_argument( |
| '--content', |
| action='store_true', |
| help=_('Generate content.xml'), |
| ) |
| generator_group.add_argument( |
| '--document', |
| action='store_true', |
| help=_('Generate .fodt'), |
| ) |
| embedimage = parser.add_mutually_exclusive_group() |
| embedimage.add_argument( |
| '--embed-image', |
| action='store_true', |
| help=_('Embed images in output xml.'), |
| ) |
| embedimage.add_argument( |
| '--no-embed-image', |
| action='store_true', |
| help=_('Do not embed images in output xml.'), |
| ) |
| return parser |
|
|
|
|
| def replace_ext(path, ext): |
| name = os.path.basename(path) |
| root = os.path.splitext(name)[0] |
| return root + ext |
|
|
|
|
| @contextmanager |
| def open_odtpkg(path): |
| odtpkg = ODTPackage(path) |
| with closing(odtpkg): |
| yield odtpkg |
|
|
|
|
| def wrap_for_xml(open_dest): |
| from .utils import wrap_open_dest_for_tty |
| from .utils import pager |
| from .utils import syntaxhighlight |
| from .utils import xmllint |
| return wrap_open_dest_for_tty(open_dest, [ |
| pager(), |
| syntaxhighlight('application/xml'), |
| xmllint(format=True), |
| ]) |
|
|