| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | from __future__ import absolute_import |
| | from __future__ import print_function |
| | from __future__ import unicode_literals |
| | from argparse import ArgumentParser |
| | from contextlib import contextmanager |
| | from contextlib import closing |
| | from functools import partial |
| | from io import BytesIO |
| | import gettext |
| | import io |
| | import logging |
| | import os.path |
| | import sys |
| |
|
| | from . import __version__ as version |
| | from .cli import init_logger |
| | from .cli import init_with_environ |
| | from .errors import ImplementationNotAvailable |
| | from .utils import mkstemp_open |
| | from .utils import hwp5_resources_path |
| | from .transforms import BaseTransform |
| | from .plat import get_relaxng_compile |
| | from .utils import cached_property |
| |
|
| |
|
| | PY3 = sys.version_info.major == 3 |
| | if PY3: |
| | basestring = str |
| | unicode = str |
| | logger = logging.getLogger(__name__) |
| | locale_dir = os.path.join(os.path.dirname(__file__), 'locale') |
| | locale_dir = os.path.abspath(locale_dir) |
| | t = gettext.translation('hwp5odt', locale_dir, fallback=True) |
| | _ = t.gettext |
| |
|
| |
|
| | RESOURCE_PATH_RNG = 'odf-relaxng/OpenDocument-v1.2-os-schema.rng' |
| | RESOURCE_PATH_XSL_SINGLE_DOCUMENT = 'xsl/odt/document.xsl' |
| | RESOURCE_PATH_XSL_STYLE = 'xsl/odt/styles.xsl' |
| | RESOURCE_PATH_XSL_CONTENT = 'xsl/odt/content.xsl' |
| |
|
| |
|
| | class ODFValidate: |
| |
|
| | def __init__(self, relaxng_compile=None): |
| | ''' |
| | >>> V = ODFValidate() |
| | ''' |
| | if relaxng_compile is None: |
| | try: |
| | relaxng_compile = self.get_default_relaxng_compile() |
| | except ImplementationNotAvailable: |
| | relaxng_compile = None |
| | self.relaxng_compile = relaxng_compile |
| |
|
| | @classmethod |
| | def get_default_relaxng_compile(cls): |
| | relaxng_compile = get_relaxng_compile() |
| | if not relaxng_compile: |
| | raise ImplementationNotAvailable('relaxng') |
| | return relaxng_compile |
| |
|
| | @cached_property |
| | def odf_validator(self): |
| | ''' |
| | >>> with V.odf_validator(sys.stdout) as output: |
| | ... output.write(xml) |
| | ''' |
| | return self.make_odf_validator() |
| |
|
| | def make_odf_validator(self): |
| | if self.relaxng_compile: |
| | with hwp5_resources_path(RESOURCE_PATH_RNG) as rng_path: |
| | return self.relaxng_compile(rng_path) |
| |
|
| |
|
| | class ODTTransform(BaseTransform, ODFValidate): |
| |
|
| | def __init__(self, xslt_compile=None, relaxng_compile=None, |
| | embedbin=False): |
| | ''' |
| | >>> from hwp5.hwp5odt import ODTTransform |
| | >>> T = ODTTransform() |
| | ''' |
| | BaseTransform.__init__(self, xslt_compile=xslt_compile, |
| | embedbin=embedbin) |
| | ODFValidate.__init__(self, relaxng_compile) |
| |
|
| | @property |
| | def transform_hwp5_to_styles(self): |
| | ''' |
| | >>> with io.open('styles.xml', 'wb') as f: |
| | ... T.transform_hwp5_to_styles(hwp5file, f) |
| | ''' |
| | transform_xhwp5 = self.transform_xhwp5_to_styles |
| | return self.make_transform_hwp5(transform_xhwp5) |
| |
|
| | @property |
| | def transform_hwp5_to_content(self): |
| | ''' |
| | >>> with io.open('content.xml', 'wb') as f: |
| | ... T.transform_hwp5_to_content(hwp5file, f) |
| | ''' |
| | transform_xhwp5 = self.transform_xhwp5_to_content |
| | return self.make_transform_hwp5(transform_xhwp5) |
| |
|
| | @property |
| | def transform_hwp5_to_single_document(self): |
| | ''' |
| | >>> with io.open('transformed.fodt', 'wb') as f: |
| | ... T.transform_hwp5_to_single_document(hwp5file, f) |
| | ''' |
| | transform_xhwp5 = self.transform_xhwp5_to_single_document |
| | return self.make_transform_hwp5(transform_xhwp5) |
| |
|
| | def transform_hwp5_to_package(self, hwp5file, odtpkg): |
| | ''' |
| | >>> with open_odtpkg('transformed.odt') as odtpkg: |
| | ... T.transform_hwp5_to_package(hwp5file, odtpkg) |
| | ''' |
| | with self.transformed_xhwp5_at_temp(hwp5file) as xml_path: |
| | self.transform_xhwp5_into_package(xml_path, odtpkg) |
| |
|
| | if 'BinData' in hwp5file: |
| | bindata = hwp5file['BinData'] |
| | for name in bindata: |
| | f = bindata[name].open() |
| | path = 'bindata/' + name |
| | mimetype = 'application/octet-stream' |
| | odtpkg.insert_stream(f, path, mimetype) |
| |
|
| | @cached_property |
| | def transform_xhwp5_to_styles(self): |
| | ''' |
| | >>> with io.open('styles.xml', 'wb') as f: |
| | ... T.transform_xhwp5_to_styles('input.xml', f) |
| | ''' |
| | resource_path = RESOURCE_PATH_XSL_STYLE |
| | return self.make_odf_transform(resource_path) |
| |
|
| | @cached_property |
| | def transform_xhwp5_to_content(self): |
| | ''' |
| | >>> with io.open('content.xml', 'wb') as f: |
| | ... T.transform_xhwp5_to_content('input.xml', f) |
| | ''' |
| | resource_path = RESOURCE_PATH_XSL_CONTENT |
| | return self.make_odf_transform(resource_path) |
| |
|
| | @cached_property |
| | def transform_xhwp5_to_single_document(self): |
| | ''' |
| | >>> with io.open('transformed.fodf', 'wb') as f: |
| | ... T.transform_xhwp5_to_single_document('input.xml', f) |
| | ''' |
| | resource_path = RESOURCE_PATH_XSL_SINGLE_DOCUMENT |
| | return self.make_odf_transform(resource_path) |
| |
|
| | @property |
| | def transform_xhwp5_into_package(self): |
| | ''' |
| | >>> with open_odtpkg('transformed.odt') as odtpkg: |
| | >>> T.transform_xhwp5_into_package('input.xml', odtpkg) |
| | ''' |
| | def transform(xhwp5path, odtpkg): |
| | with self.transformed_styles_at_temp(xhwp5path) as path: |
| | odtpkg.insert_path(path, 'styles.xml', 'text/xml') |
| | with self.transformed_content_at_temp(xhwp5path) as path: |
| | odtpkg.insert_path(path, 'content.xml', 'text/xml') |
| |
|
| | rdf = BytesIO() |
| | manifest_rdf(rdf) |
| | rdf.seek(0) |
| | odtpkg.insert_stream(rdf, 'manifest.rdf', |
| | 'application/rdf+xml') |
| | return transform |
| |
|
| | def transformed_styles_at_temp(self, xhwp5path): |
| | ''' |
| | >>> with T.transformed_styles_at_temp('input.xml') as styles_path: |
| | ... pass |
| | ''' |
| | transform_xhwp5 = self.transform_xhwp5_to_styles |
| | return transformed_at_temp_path(xhwp5path, transform_xhwp5) |
| |
|
| | def transformed_content_at_temp(self, xhwp5path): |
| | ''' |
| | >>> with T.transformed_content_at_temp('input.xml') as content_path: |
| | ... pass |
| | ''' |
| | transform_xhwp5 = self.transform_xhwp5_to_content |
| | return transformed_at_temp_path(xhwp5path, transform_xhwp5) |
| |
|
| | def transformed_single_document_at_temp(self, xhwp5path): |
| | ''' |
| | >>> with T.transformed_single_document_at_temp('input.xml') as path: |
| | ... pass |
| | ''' |
| | transform_xhwp5 = self.transform_xhwp5_to_single_document |
| | return transformed_at_temp_path(xhwp5path, transform_xhwp5) |
| |
|
| | def make_odf_transform(self, resource_path): |
| | transform = self.make_xsl_transform(resource_path) |
| | validator = self.odf_validator |
| | if validator: |
| | def validating_transform(input, output): |
| | with validator.validating_output(output) as output: |
| | transform(input, output) |
| | return validating_transform |
| | else: |
| | return transform |
| |
|
| |
|
| | @contextmanager |
| | def transformed_at_temp_path(inp_path, transform): |
| | with mkstemp_open() as (tmp_path, f): |
| | transform(inp_path, f) |
| | f.flush() |
| | yield tmp_path |
| |
|
| |
|
| | class ODTPackage(object): |
| | def __init__(self, path_or_zipfile): |
| | self.files = [] |
| |
|
| | if isinstance(path_or_zipfile, basestring): |
| | from zipfile import ZipFile |
| | zipfile = ZipFile(path_or_zipfile, 'w') |
| | else: |
| | zipfile = path_or_zipfile |
| | self.zf = zipfile |
| |
|
| | def insert_path(self, src_path, path, media_type): |
| | with io.open(src_path, 'rb') as f: |
| | self.insert_stream(f, path, media_type) |
| |
|
| | def insert_stream(self, f, path, media_type): |
| | if not isinstance(path, unicode): |
| | path = path.decode('utf-8') |
| | self.zf.writestr(path, f.read()) |
| | self.files.append(dict(full_path=path, media_type=media_type)) |
| |
|
| | def close(self): |
| |
|
| | manifest = BytesIO() |
| | manifest_xml(manifest, self.files) |
| | manifest.seek(0) |
| | self.zf.writestr('META-INF/manifest.xml', manifest.getvalue()) |
| | self.zf.writestr('mimetype', 'application/vnd.oasis.opendocument.text') |
| |
|
| | self.zf.close() |
| |
|
| |
|
| | def manifest_xml(f, files): |
| | from xml.sax.saxutils import XMLGenerator |
| | xml = XMLGenerator(f, 'utf-8') |
| | xml.startDocument() |
| |
|
| | uri = 'urn:oasis:names:tc:opendocument:xmlns:manifest:1.0' |
| | prefix = 'manifest' |
| | xml.startPrefixMapping(prefix, uri) |
| |
|
| | def startElement(name, attrs): |
| | attrs = dict(((uri, n), v) for n, v in attrs.items()) |
| | xml.startElementNS((uri, name), prefix + ':' + name, attrs) |
| |
|
| | def endElement(name): |
| | xml.endElementNS((uri, name), prefix + ':' + name) |
| |
|
| | def file_entry(full_path, media_type, **kwargs): |
| | attrs = {'media-type': media_type, 'full-path': full_path} |
| | attrs.update(dict((n.replace('_', '-'), v) |
| | for n, v in kwargs.items())) |
| | startElement('file-entry', attrs) |
| | endElement('file-entry') |
| |
|
| | startElement('manifest', dict(version='1.2')) |
| | file_entry('/', 'application/vnd.oasis.opendocument.text', version='1.2') |
| | for e in files: |
| | e = dict(e) |
| | full_path = e.pop('full_path') |
| | media_type = e.pop('media_type', 'application/octet-stream') |
| | file_entry(full_path, media_type) |
| | endElement('manifest') |
| |
|
| | xml.endPrefixMapping(prefix) |
| | xml.endDocument() |
| |
|
| |
|
| | def manifest_rdf(f): |
| | f.write(b'''<?xml version="1.0" encoding="utf-8"?> |
| | <rdf:RDF |
| | xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" |
| | xmlns:pkg="http://docs.oasis-open.org/ns/office/1.2/meta/pkg#" |
| | xmlns:odf="http://docs.oasis-open.org/ns/office/1.2/meta/odf#"> |
| | <pkg:Document rdf:about=""> |
| | <pkg:hasPart rdf:resource="content.xml"/> |
| | <pkg:hasPart rdf:resource="styles.xml"/> |
| | </pkg:Document> |
| | <odf:ContentFile rdf:about="content.xml"/> |
| | <odf:StylesFile rdf:about="styles.xml"/> |
| | </rdf:RDF>''') |
| |
|
| |
|
| | def main(): |
| | from .dataio import ParseError |
| | from .errors import InvalidHwp5FileError |
| | from .utils import make_open_dest_file |
| | from .xmlmodel import Hwp5File |
| |
|
| | argparser = main_argparser() |
| | args = argparser.parse_args() |
| | init_logger(args) |
| |
|
| | init_with_environ() |
| |
|
| | hwp5path = args.hwp5file |
| |
|
| | odt_transform = ODTTransform() |
| |
|
| | open_dest = make_open_dest_file(args.output) |
| | if args.document: |
| | odt_transform.embedbin = not args.no_embed_image |
| | transform = odt_transform.transform_hwp5_to_single_document |
| | open_dest = wrap_for_xml(open_dest) |
| | elif args.styles: |
| | odt_transform.embedbin = args.embed_image |
| | transform = odt_transform.transform_hwp5_to_styles |
| | open_dest = wrap_for_xml(open_dest) |
| | elif args.content: |
| | odt_transform.embedbin = args.embed_image |
| | transform = odt_transform.transform_hwp5_to_content |
| | open_dest = wrap_for_xml(open_dest) |
| | else: |
| | odt_transform.embedbin = args.embed_image |
| | transform = odt_transform.transform_hwp5_to_package |
| | dest_path = args.output |
| | dest_path = dest_path or replace_ext(hwp5path, '.odt') |
| | open_dest = partial(open_odtpkg, dest_path) |
| |
|
| | try: |
| | with closing(Hwp5File(hwp5path)) as hwp5file: |
| | with open_dest() as dest: |
| | transform(hwp5file, dest) |
| | except ParseError as e: |
| | e.print_to_logger(logger) |
| | except InvalidHwp5FileError as e: |
| | logger.error('%s', e) |
| | sys.exit(1) |
| |
|
| |
|
| | def main_argparser(): |
| | parser = ArgumentParser( |
| | prog='hwp5odt', |
| | description=_('HWPv5 to odt converter'), |
| | ) |
| | parser.add_argument( |
| | '--version', |
| | action='version', |
| | version='%(prog)s {}'.format(version) |
| | ) |
| | parser.add_argument( |
| | '--loglevel', |
| | help=_('Set log level.'), |
| | ) |
| | parser.add_argument( |
| | '--logfile', |
| | help=_('Set log file.'), |
| | ) |
| | parser.add_argument( |
| | '--output', |
| | help=_('Output file'), |
| | ) |
| | parser.add_argument( |
| | 'hwp5file', |
| | metavar='<hwp5file>', |
| | help=_('.hwp file to convert'), |
| | ) |
| | generator_group = parser.add_mutually_exclusive_group() |
| | generator_group.add_argument( |
| | '--styles', |
| | action='store_true', |
| | help=_('Generate styles.xml'), |
| | ) |
| | generator_group.add_argument( |
| | '--content', |
| | action='store_true', |
| | help=_('Generate content.xml'), |
| | ) |
| | generator_group.add_argument( |
| | '--document', |
| | action='store_true', |
| | help=_('Generate .fodt'), |
| | ) |
| | embedimage = parser.add_mutually_exclusive_group() |
| | embedimage.add_argument( |
| | '--embed-image', |
| | action='store_true', |
| | help=_('Embed images in output xml.'), |
| | ) |
| | embedimage.add_argument( |
| | '--no-embed-image', |
| | action='store_true', |
| | help=_('Do not embed images in output xml.'), |
| | ) |
| | return parser |
| |
|
| |
|
| | def replace_ext(path, ext): |
| | name = os.path.basename(path) |
| | root = os.path.splitext(name)[0] |
| | return root + ext |
| |
|
| |
|
| | @contextmanager |
| | def open_odtpkg(path): |
| | odtpkg = ODTPackage(path) |
| | with closing(odtpkg): |
| | yield odtpkg |
| |
|
| |
|
| | def wrap_for_xml(open_dest): |
| | from .utils import wrap_open_dest_for_tty |
| | from .utils import pager |
| | from .utils import syntaxhighlight |
| | from .utils import xmllint |
| | return wrap_open_dest_for_tty(open_dest, [ |
| | pager(), |
| | syntaxhighlight('application/xml'), |
| | xmllint(format=True), |
| | ]) |
| |
|