File size: 5,596 Bytes
d94b56e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | # -*- coding: utf-8 -*-
#
# pyhwp : hwp file format parser in python
# Copyright (C) 2010-2023 mete0r <https://github.com/mete0r>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
from contextlib import contextmanager
import io
import logging
import os.path
import shutil
import sys
import tempfile
from ..errors import ValidationFailed
PY3 = sys.version_info.major == 3
logger = logging.getLogger(__name__)
def is_enabled():
try:
from lxml import etree # noqa
except ImportError:
return False
else:
return True
def xslt(xsl_path, inp_path, out_path):
''' Transform XML with XSL
:param xsl_path: stylesheet path
:param inp_path: input path
:param out_path: output path
'''
transform = xslt_compile(xsl_path)
with io.open(out_path, 'wb') as f:
return transform(inp_path, f)
def xslt_compile(xsl_path, **params):
xslt = XSLT(xsl_path, **params)
return xslt.transform_into_stream
class XSLT:
def __init__(self, xsl_path, **params):
''' Compile XSL Transform function.
:param xsl_path: stylesheet path
:returns: a transform function
'''
from lxml import etree
with io.open(xsl_path, 'rb') as xsl_file:
xsl_doc = etree.parse(xsl_file)
self.xsl_path = xsl_path
self.etree_xslt = etree.XSLT(xsl_doc)
self.params = dict((name, etree.XSLT.strparam(value))
for name, value in params.items())
def transform(self, input, output):
'''
>>> T.transform('input.xml', 'output.xml')
'''
with io.open(input, 'rb') as inp_file:
with io.open(output, 'wb') as out_file:
return self._transform(inp_file, out_file)
def transform_into_stream(self, input, output):
'''
>>> T.transform_into_stream('input.xml', sys.stdout)
'''
with io.open(input, 'rb') as inp_file:
return self._transform(inp_file, output)
def _transform(self, input, output):
# input, output: binary stream
from lxml import etree
source = etree.parse(input)
logger.info('_lxml.xslt(%s) start',
os.path.basename(self.xsl_path))
result = self.etree_xslt(source, **self.params)
logger.info('_lxml.xslt(%s) end',
os.path.basename(self.xsl_path))
# https://lxml.de/1.3/FAQ.html#what-is-the-difference-between-str-xslt-doc-and-xslt-doc-write
result = bytes(result)
output.write(result)
return dict()
def relaxng(rng_path, inp_path):
relaxng = RelaxNG(rng_path)
return relaxng.validate(inp_path)
def relaxng_compile(rng_path):
''' Compile RelaxNG file
:param rng_path: RelaxNG path
:returns: a validation function
'''
return RelaxNG(rng_path)
class RelaxNG:
def __init__(self, rng_path):
from lxml import etree
with io.open(rng_path, 'rb') as rng_file:
rng = etree.parse(rng_file)
self.rng_path = rng_path
self.etree_relaxng = etree.RelaxNG(rng)
@contextmanager
def validating_output(self, output):
fd, name = tempfile.mkstemp()
try:
with os.fdopen(fd, 'wb+') as f:
yield f
f.seek(0)
if not self.validate_stream(f):
raise ValidationFailed('RelaxNG')
f.seek(0)
shutil.copyfileobj(f, output)
finally:
try:
os.unlink(name)
except Exception as e:
logger.warning('%s: can\'t unlink %s', e, name)
def validate(self, input):
from lxml import etree
with io.open(input, 'rb') as f:
doc = etree.parse(f)
return self._validate(doc)
def validate_stream(self, input):
from lxml import etree
doc = etree.parse(input)
return self._validate(doc)
def _validate(self, doc):
logger.info('_lxml.relaxng(%s) start', os.path.basename(self.rng_path))
try:
valid = self.etree_relaxng.validate(doc)
except Exception as e:
logger.exception(e)
raise
else:
if not valid:
for error in self.etree_relaxng.error_log:
logger.error('%s', error)
return valid
finally:
logger.info(
'_lxml.relaxng(%s) end',
os.path.basename(self.rng_path)
)
def errlog_to_dict(error):
return dict(message=error.message,
filename=error.filename,
line=error.line,
column=error.column,
domain=error.domain_name,
type=error.type_name,
level=error.level_name)
|